1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
5*4882a593Smuzhiyun *
6*4882a593Smuzhiyun * This program is free software and is provided to you under the terms of the
7*4882a593Smuzhiyun * GNU General Public License version 2 as published by the Free Software
8*4882a593Smuzhiyun * Foundation, and any use by you of this program is subject to the terms
9*4882a593Smuzhiyun * of such GNU license.
10*4882a593Smuzhiyun *
11*4882a593Smuzhiyun * This program is distributed in the hope that it will be useful,
12*4882a593Smuzhiyun * but WITHOUT ANY WARRANTY; without even the implied warranty of
13*4882a593Smuzhiyun * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*4882a593Smuzhiyun * GNU General Public License for more details.
15*4882a593Smuzhiyun *
16*4882a593Smuzhiyun * You should have received a copy of the GNU General Public License
17*4882a593Smuzhiyun * along with this program; if not, you can access it online at
18*4882a593Smuzhiyun * http://www.gnu.org/licenses/gpl-2.0.html.
19*4882a593Smuzhiyun *
20*4882a593Smuzhiyun */
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun /**
23*4882a593Smuzhiyun * DOC: Base kernel memory APIs
24*4882a593Smuzhiyun */
25*4882a593Smuzhiyun #include <linux/dma-buf.h>
26*4882a593Smuzhiyun #include <linux/kernel.h>
27*4882a593Smuzhiyun #include <linux/bug.h>
28*4882a593Smuzhiyun #include <linux/compat.h>
29*4882a593Smuzhiyun #include <linux/version.h>
30*4882a593Smuzhiyun #include <linux/log2.h>
31*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_OF)
32*4882a593Smuzhiyun #include <linux/of_platform.h>
33*4882a593Smuzhiyun #endif
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun #include <mali_kbase_config.h>
36*4882a593Smuzhiyun #include <mali_kbase.h>
37*4882a593Smuzhiyun #include <gpu/mali_kbase_gpu_regmap.h>
38*4882a593Smuzhiyun #include <mali_kbase_cache_policy.h>
39*4882a593Smuzhiyun #include <mali_kbase_hw.h>
40*4882a593Smuzhiyun #include <tl/mali_kbase_tracepoints.h>
41*4882a593Smuzhiyun #include <mali_kbase_native_mgm.h>
42*4882a593Smuzhiyun #include <mali_kbase_mem_pool_group.h>
43*4882a593Smuzhiyun #include <mmu/mali_kbase_mmu.h>
44*4882a593Smuzhiyun #include <mali_kbase_config_defaults.h>
45*4882a593Smuzhiyun #include <mali_kbase_trace_gpu_mem.h>
46*4882a593Smuzhiyun
47*4882a593Smuzhiyun #define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
48*4882a593Smuzhiyun #define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun /*
53*4882a593Smuzhiyun * Alignment of objects allocated by the GPU inside a just-in-time memory
54*4882a593Smuzhiyun * region whose size is given by an end address
55*4882a593Smuzhiyun *
56*4882a593Smuzhiyun * This is the alignment of objects allocated by the GPU, but possibly not
57*4882a593Smuzhiyun * fully written to. When taken into account with
58*4882a593Smuzhiyun * KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES it gives the maximum number of bytes
59*4882a593Smuzhiyun * that the JIT memory report size can exceed the actual backed memory size.
60*4882a593Smuzhiyun */
61*4882a593Smuzhiyun #define KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES (128u)
62*4882a593Smuzhiyun
63*4882a593Smuzhiyun /*
64*4882a593Smuzhiyun * Maximum size of objects allocated by the GPU inside a just-in-time memory
65*4882a593Smuzhiyun * region whose size is given by an end address
66*4882a593Smuzhiyun *
67*4882a593Smuzhiyun * This is the maximum size of objects allocated by the GPU, but possibly not
68*4882a593Smuzhiyun * fully written to. When taken into account with
69*4882a593Smuzhiyun * KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES it gives the maximum number of bytes
70*4882a593Smuzhiyun * that the JIT memory report size can exceed the actual backed memory size.
71*4882a593Smuzhiyun */
72*4882a593Smuzhiyun #define KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES (512u)
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun /* Forward declarations */
77*4882a593Smuzhiyun static void free_partial_locked(struct kbase_context *kctx,
78*4882a593Smuzhiyun struct kbase_mem_pool *pool, struct tagged_addr tp);
79*4882a593Smuzhiyun
kbase_get_num_cpu_va_bits(struct kbase_context * kctx)80*4882a593Smuzhiyun static size_t kbase_get_num_cpu_va_bits(struct kbase_context *kctx)
81*4882a593Smuzhiyun {
82*4882a593Smuzhiyun #if defined(CONFIG_ARM64)
83*4882a593Smuzhiyun /* VA_BITS can be as high as 48 bits, but all bits are available for
84*4882a593Smuzhiyun * both user and kernel.
85*4882a593Smuzhiyun */
86*4882a593Smuzhiyun size_t cpu_va_bits = VA_BITS;
87*4882a593Smuzhiyun #elif defined(CONFIG_X86_64)
88*4882a593Smuzhiyun /* x86_64 can access 48 bits of VA, but the 48th is used to denote
89*4882a593Smuzhiyun * kernel (1) vs userspace (0), so the max here is 47.
90*4882a593Smuzhiyun */
91*4882a593Smuzhiyun size_t cpu_va_bits = 47;
92*4882a593Smuzhiyun #elif defined(CONFIG_ARM) || defined(CONFIG_X86_32)
93*4882a593Smuzhiyun size_t cpu_va_bits = sizeof(void *) * BITS_PER_BYTE;
94*4882a593Smuzhiyun #else
95*4882a593Smuzhiyun #error "Unknown CPU VA width for this architecture"
96*4882a593Smuzhiyun #endif
97*4882a593Smuzhiyun
98*4882a593Smuzhiyun if (kbase_ctx_compat_mode(kctx))
99*4882a593Smuzhiyun cpu_va_bits = 32;
100*4882a593Smuzhiyun
101*4882a593Smuzhiyun return cpu_va_bits;
102*4882a593Smuzhiyun }
103*4882a593Smuzhiyun
104*4882a593Smuzhiyun /* This function finds out which RB tree the given pfn from the GPU VA belongs
105*4882a593Smuzhiyun * to based on the memory zone the pfn refers to
106*4882a593Smuzhiyun */
kbase_gpu_va_to_rbtree(struct kbase_context * kctx,u64 gpu_pfn)107*4882a593Smuzhiyun static struct rb_root *kbase_gpu_va_to_rbtree(struct kbase_context *kctx,
108*4882a593Smuzhiyun u64 gpu_pfn)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun struct rb_root *rbtree = NULL;
111*4882a593Smuzhiyun
112*4882a593Smuzhiyun struct kbase_reg_zone *exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun #if MALI_USE_CSF
115*4882a593Smuzhiyun struct kbase_reg_zone *fixed_va_zone =
116*4882a593Smuzhiyun kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_FIXED_VA);
117*4882a593Smuzhiyun
118*4882a593Smuzhiyun struct kbase_reg_zone *exec_fixed_va_zone =
119*4882a593Smuzhiyun kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA);
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun if (gpu_pfn >= fixed_va_zone->base_pfn) {
122*4882a593Smuzhiyun rbtree = &kctx->reg_rbtree_fixed;
123*4882a593Smuzhiyun return rbtree;
124*4882a593Smuzhiyun } else if (gpu_pfn >= exec_fixed_va_zone->base_pfn) {
125*4882a593Smuzhiyun rbtree = &kctx->reg_rbtree_exec_fixed;
126*4882a593Smuzhiyun return rbtree;
127*4882a593Smuzhiyun }
128*4882a593Smuzhiyun #endif
129*4882a593Smuzhiyun if (gpu_pfn >= exec_va_zone->base_pfn)
130*4882a593Smuzhiyun rbtree = &kctx->reg_rbtree_exec;
131*4882a593Smuzhiyun else {
132*4882a593Smuzhiyun u64 same_va_end;
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun if (kbase_ctx_compat_mode(kctx)) {
135*4882a593Smuzhiyun same_va_end = KBASE_REG_ZONE_CUSTOM_VA_BASE;
136*4882a593Smuzhiyun } else {
137*4882a593Smuzhiyun struct kbase_reg_zone *same_va_zone =
138*4882a593Smuzhiyun kbase_ctx_reg_zone_get(kctx,
139*4882a593Smuzhiyun KBASE_REG_ZONE_SAME_VA);
140*4882a593Smuzhiyun same_va_end = kbase_reg_zone_end_pfn(same_va_zone);
141*4882a593Smuzhiyun }
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun if (gpu_pfn >= same_va_end)
144*4882a593Smuzhiyun rbtree = &kctx->reg_rbtree_custom;
145*4882a593Smuzhiyun else
146*4882a593Smuzhiyun rbtree = &kctx->reg_rbtree_same;
147*4882a593Smuzhiyun }
148*4882a593Smuzhiyun
149*4882a593Smuzhiyun return rbtree;
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun /* This function inserts a region into the tree. */
kbase_region_tracker_insert(struct kbase_va_region * new_reg)153*4882a593Smuzhiyun static void kbase_region_tracker_insert(struct kbase_va_region *new_reg)
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun u64 start_pfn = new_reg->start_pfn;
156*4882a593Smuzhiyun struct rb_node **link = NULL;
157*4882a593Smuzhiyun struct rb_node *parent = NULL;
158*4882a593Smuzhiyun struct rb_root *rbtree = NULL;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun rbtree = new_reg->rbtree;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun link = &(rbtree->rb_node);
163*4882a593Smuzhiyun /* Find the right place in the tree using tree search */
164*4882a593Smuzhiyun while (*link) {
165*4882a593Smuzhiyun struct kbase_va_region *old_reg;
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun parent = *link;
168*4882a593Smuzhiyun old_reg = rb_entry(parent, struct kbase_va_region, rblink);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun /* RBTree requires no duplicate entries. */
171*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(old_reg->start_pfn != start_pfn);
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun if (old_reg->start_pfn > start_pfn)
174*4882a593Smuzhiyun link = &(*link)->rb_left;
175*4882a593Smuzhiyun else
176*4882a593Smuzhiyun link = &(*link)->rb_right;
177*4882a593Smuzhiyun }
178*4882a593Smuzhiyun
179*4882a593Smuzhiyun /* Put the new node there, and rebalance tree */
180*4882a593Smuzhiyun rb_link_node(&(new_reg->rblink), parent, link);
181*4882a593Smuzhiyun
182*4882a593Smuzhiyun rb_insert_color(&(new_reg->rblink), rbtree);
183*4882a593Smuzhiyun }
184*4882a593Smuzhiyun
find_region_enclosing_range_rbtree(struct rb_root * rbtree,u64 start_pfn,size_t nr_pages)185*4882a593Smuzhiyun static struct kbase_va_region *find_region_enclosing_range_rbtree(
186*4882a593Smuzhiyun struct rb_root *rbtree, u64 start_pfn, size_t nr_pages)
187*4882a593Smuzhiyun {
188*4882a593Smuzhiyun struct rb_node *rbnode;
189*4882a593Smuzhiyun struct kbase_va_region *reg;
190*4882a593Smuzhiyun u64 end_pfn = start_pfn + nr_pages;
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun rbnode = rbtree->rb_node;
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun while (rbnode) {
195*4882a593Smuzhiyun u64 tmp_start_pfn, tmp_end_pfn;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun reg = rb_entry(rbnode, struct kbase_va_region, rblink);
198*4882a593Smuzhiyun tmp_start_pfn = reg->start_pfn;
199*4882a593Smuzhiyun tmp_end_pfn = reg->start_pfn + reg->nr_pages;
200*4882a593Smuzhiyun
201*4882a593Smuzhiyun /* If start is lower than this, go left. */
202*4882a593Smuzhiyun if (start_pfn < tmp_start_pfn)
203*4882a593Smuzhiyun rbnode = rbnode->rb_left;
204*4882a593Smuzhiyun /* If end is higher than this, then go right. */
205*4882a593Smuzhiyun else if (end_pfn > tmp_end_pfn)
206*4882a593Smuzhiyun rbnode = rbnode->rb_right;
207*4882a593Smuzhiyun else /* Enclosing */
208*4882a593Smuzhiyun return reg;
209*4882a593Smuzhiyun }
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun return NULL;
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
kbase_find_region_enclosing_address(struct rb_root * rbtree,u64 gpu_addr)214*4882a593Smuzhiyun struct kbase_va_region *kbase_find_region_enclosing_address(
215*4882a593Smuzhiyun struct rb_root *rbtree, u64 gpu_addr)
216*4882a593Smuzhiyun {
217*4882a593Smuzhiyun u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
218*4882a593Smuzhiyun struct rb_node *rbnode;
219*4882a593Smuzhiyun struct kbase_va_region *reg;
220*4882a593Smuzhiyun
221*4882a593Smuzhiyun rbnode = rbtree->rb_node;
222*4882a593Smuzhiyun
223*4882a593Smuzhiyun while (rbnode) {
224*4882a593Smuzhiyun u64 tmp_start_pfn, tmp_end_pfn;
225*4882a593Smuzhiyun
226*4882a593Smuzhiyun reg = rb_entry(rbnode, struct kbase_va_region, rblink);
227*4882a593Smuzhiyun tmp_start_pfn = reg->start_pfn;
228*4882a593Smuzhiyun tmp_end_pfn = reg->start_pfn + reg->nr_pages;
229*4882a593Smuzhiyun
230*4882a593Smuzhiyun /* If start is lower than this, go left. */
231*4882a593Smuzhiyun if (gpu_pfn < tmp_start_pfn)
232*4882a593Smuzhiyun rbnode = rbnode->rb_left;
233*4882a593Smuzhiyun /* If end is higher than this, then go right. */
234*4882a593Smuzhiyun else if (gpu_pfn >= tmp_end_pfn)
235*4882a593Smuzhiyun rbnode = rbnode->rb_right;
236*4882a593Smuzhiyun else /* Enclosing */
237*4882a593Smuzhiyun return reg;
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun return NULL;
241*4882a593Smuzhiyun }
242*4882a593Smuzhiyun
243*4882a593Smuzhiyun /* Find region enclosing given address. */
kbase_region_tracker_find_region_enclosing_address(struct kbase_context * kctx,u64 gpu_addr)244*4882a593Smuzhiyun struct kbase_va_region *kbase_region_tracker_find_region_enclosing_address(
245*4882a593Smuzhiyun struct kbase_context *kctx, u64 gpu_addr)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
248*4882a593Smuzhiyun struct rb_root *rbtree = NULL;
249*4882a593Smuzhiyun
250*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
251*4882a593Smuzhiyun
252*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
253*4882a593Smuzhiyun
254*4882a593Smuzhiyun rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun return kbase_find_region_enclosing_address(rbtree, gpu_addr);
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_enclosing_address);
260*4882a593Smuzhiyun
kbase_find_region_base_address(struct rb_root * rbtree,u64 gpu_addr)261*4882a593Smuzhiyun struct kbase_va_region *kbase_find_region_base_address(
262*4882a593Smuzhiyun struct rb_root *rbtree, u64 gpu_addr)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
265*4882a593Smuzhiyun struct rb_node *rbnode = NULL;
266*4882a593Smuzhiyun struct kbase_va_region *reg = NULL;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun rbnode = rbtree->rb_node;
269*4882a593Smuzhiyun
270*4882a593Smuzhiyun while (rbnode) {
271*4882a593Smuzhiyun reg = rb_entry(rbnode, struct kbase_va_region, rblink);
272*4882a593Smuzhiyun if (reg->start_pfn > gpu_pfn)
273*4882a593Smuzhiyun rbnode = rbnode->rb_left;
274*4882a593Smuzhiyun else if (reg->start_pfn < gpu_pfn)
275*4882a593Smuzhiyun rbnode = rbnode->rb_right;
276*4882a593Smuzhiyun else
277*4882a593Smuzhiyun return reg;
278*4882a593Smuzhiyun }
279*4882a593Smuzhiyun
280*4882a593Smuzhiyun return NULL;
281*4882a593Smuzhiyun }
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun /* Find region with given base address */
kbase_region_tracker_find_region_base_address(struct kbase_context * kctx,u64 gpu_addr)284*4882a593Smuzhiyun struct kbase_va_region *kbase_region_tracker_find_region_base_address(
285*4882a593Smuzhiyun struct kbase_context *kctx, u64 gpu_addr)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun u64 gpu_pfn = gpu_addr >> PAGE_SHIFT;
288*4882a593Smuzhiyun struct rb_root *rbtree = NULL;
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun rbtree = kbase_gpu_va_to_rbtree(kctx, gpu_pfn);
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun return kbase_find_region_base_address(rbtree, gpu_addr);
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_region_tracker_find_region_base_address);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun /* Find region meeting given requirements */
kbase_region_tracker_find_region_meeting_reqs(struct kbase_va_region * reg_reqs,size_t nr_pages,size_t align_offset,size_t align_mask,u64 * out_start_pfn)300*4882a593Smuzhiyun static struct kbase_va_region *kbase_region_tracker_find_region_meeting_reqs(
301*4882a593Smuzhiyun struct kbase_va_region *reg_reqs,
302*4882a593Smuzhiyun size_t nr_pages, size_t align_offset, size_t align_mask,
303*4882a593Smuzhiyun u64 *out_start_pfn)
304*4882a593Smuzhiyun {
305*4882a593Smuzhiyun struct rb_node *rbnode = NULL;
306*4882a593Smuzhiyun struct kbase_va_region *reg = NULL;
307*4882a593Smuzhiyun struct rb_root *rbtree = NULL;
308*4882a593Smuzhiyun
309*4882a593Smuzhiyun /* Note that this search is a linear search, as we do not have a target
310*4882a593Smuzhiyun * address in mind, so does not benefit from the rbtree search
311*4882a593Smuzhiyun */
312*4882a593Smuzhiyun rbtree = reg_reqs->rbtree;
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun for (rbnode = rb_first(rbtree); rbnode; rbnode = rb_next(rbnode)) {
315*4882a593Smuzhiyun reg = rb_entry(rbnode, struct kbase_va_region, rblink);
316*4882a593Smuzhiyun if ((reg->nr_pages >= nr_pages) &&
317*4882a593Smuzhiyun (reg->flags & KBASE_REG_FREE)) {
318*4882a593Smuzhiyun /* Check alignment */
319*4882a593Smuzhiyun u64 start_pfn = reg->start_pfn;
320*4882a593Smuzhiyun
321*4882a593Smuzhiyun /* When align_offset == align, this sequence is
322*4882a593Smuzhiyun * equivalent to:
323*4882a593Smuzhiyun * (start_pfn + align_mask) & ~(align_mask)
324*4882a593Smuzhiyun *
325*4882a593Smuzhiyun * Otherwise, it aligns to n*align + offset, for the
326*4882a593Smuzhiyun * lowest value n that makes this still >start_pfn
327*4882a593Smuzhiyun */
328*4882a593Smuzhiyun start_pfn += align_mask;
329*4882a593Smuzhiyun start_pfn -= (start_pfn - align_offset) & (align_mask);
330*4882a593Smuzhiyun
331*4882a593Smuzhiyun if (!(reg_reqs->flags & KBASE_REG_GPU_NX)) {
332*4882a593Smuzhiyun /* Can't end at 4GB boundary */
333*4882a593Smuzhiyun if (0 == ((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB))
334*4882a593Smuzhiyun start_pfn += align_offset;
335*4882a593Smuzhiyun
336*4882a593Smuzhiyun /* Can't start at 4GB boundary */
337*4882a593Smuzhiyun if (0 == (start_pfn & BASE_MEM_PFN_MASK_4GB))
338*4882a593Smuzhiyun start_pfn += align_offset;
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun if (!((start_pfn + nr_pages) & BASE_MEM_PFN_MASK_4GB) ||
341*4882a593Smuzhiyun !(start_pfn & BASE_MEM_PFN_MASK_4GB))
342*4882a593Smuzhiyun continue;
343*4882a593Smuzhiyun } else if (reg_reqs->flags &
344*4882a593Smuzhiyun KBASE_REG_GPU_VA_SAME_4GB_PAGE) {
345*4882a593Smuzhiyun u64 end_pfn = start_pfn + nr_pages - 1;
346*4882a593Smuzhiyun
347*4882a593Smuzhiyun if ((start_pfn & ~BASE_MEM_PFN_MASK_4GB) !=
348*4882a593Smuzhiyun (end_pfn & ~BASE_MEM_PFN_MASK_4GB))
349*4882a593Smuzhiyun start_pfn = end_pfn & ~BASE_MEM_PFN_MASK_4GB;
350*4882a593Smuzhiyun }
351*4882a593Smuzhiyun
352*4882a593Smuzhiyun if ((start_pfn >= reg->start_pfn) &&
353*4882a593Smuzhiyun (start_pfn <= (reg->start_pfn + reg->nr_pages - 1)) &&
354*4882a593Smuzhiyun ((start_pfn + nr_pages - 1) <= (reg->start_pfn + reg->nr_pages - 1))) {
355*4882a593Smuzhiyun *out_start_pfn = start_pfn;
356*4882a593Smuzhiyun return reg;
357*4882a593Smuzhiyun }
358*4882a593Smuzhiyun }
359*4882a593Smuzhiyun }
360*4882a593Smuzhiyun
361*4882a593Smuzhiyun return NULL;
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun /**
365*4882a593Smuzhiyun * kbase_remove_va_region - Remove a region object from the global list.
366*4882a593Smuzhiyun *
367*4882a593Smuzhiyun * @kbdev: The kbase device
368*4882a593Smuzhiyun * @reg: Region object to remove
369*4882a593Smuzhiyun *
370*4882a593Smuzhiyun * The region reg is removed, possibly by merging with other free and
371*4882a593Smuzhiyun * compatible adjacent regions. It must be called with the context
372*4882a593Smuzhiyun * region lock held. The associated memory is not released (see
373*4882a593Smuzhiyun * kbase_free_alloced_region). Internal use only.
374*4882a593Smuzhiyun */
kbase_remove_va_region(struct kbase_device * kbdev,struct kbase_va_region * reg)375*4882a593Smuzhiyun void kbase_remove_va_region(struct kbase_device *kbdev,
376*4882a593Smuzhiyun struct kbase_va_region *reg)
377*4882a593Smuzhiyun {
378*4882a593Smuzhiyun struct rb_node *rbprev;
379*4882a593Smuzhiyun struct kbase_va_region *prev = NULL;
380*4882a593Smuzhiyun struct rb_node *rbnext;
381*4882a593Smuzhiyun struct kbase_va_region *next = NULL;
382*4882a593Smuzhiyun struct rb_root *reg_rbtree = NULL;
383*4882a593Smuzhiyun struct kbase_va_region *orig_reg = reg;
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun int merged_front = 0;
386*4882a593Smuzhiyun int merged_back = 0;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun reg_rbtree = reg->rbtree;
389*4882a593Smuzhiyun
390*4882a593Smuzhiyun if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
391*4882a593Smuzhiyun return;
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun /* Try to merge with the previous block first */
394*4882a593Smuzhiyun rbprev = rb_prev(&(reg->rblink));
395*4882a593Smuzhiyun if (rbprev) {
396*4882a593Smuzhiyun prev = rb_entry(rbprev, struct kbase_va_region, rblink);
397*4882a593Smuzhiyun if (prev->flags & KBASE_REG_FREE) {
398*4882a593Smuzhiyun /* We're compatible with the previous VMA, merge with
399*4882a593Smuzhiyun * it, handling any gaps for robustness.
400*4882a593Smuzhiyun */
401*4882a593Smuzhiyun u64 prev_end_pfn = prev->start_pfn + prev->nr_pages;
402*4882a593Smuzhiyun
403*4882a593Smuzhiyun WARN_ON((prev->flags & KBASE_REG_ZONE_MASK) !=
404*4882a593Smuzhiyun (reg->flags & KBASE_REG_ZONE_MASK));
405*4882a593Smuzhiyun if (!WARN_ON(reg->start_pfn < prev_end_pfn))
406*4882a593Smuzhiyun prev->nr_pages += reg->start_pfn - prev_end_pfn;
407*4882a593Smuzhiyun prev->nr_pages += reg->nr_pages;
408*4882a593Smuzhiyun rb_erase(&(reg->rblink), reg_rbtree);
409*4882a593Smuzhiyun reg = prev;
410*4882a593Smuzhiyun merged_front = 1;
411*4882a593Smuzhiyun }
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
414*4882a593Smuzhiyun /* Try to merge with the next block second */
415*4882a593Smuzhiyun /* Note we do the lookup here as the tree may have been rebalanced. */
416*4882a593Smuzhiyun rbnext = rb_next(&(reg->rblink));
417*4882a593Smuzhiyun if (rbnext) {
418*4882a593Smuzhiyun next = rb_entry(rbnext, struct kbase_va_region, rblink);
419*4882a593Smuzhiyun if (next->flags & KBASE_REG_FREE) {
420*4882a593Smuzhiyun /* We're compatible with the next VMA, merge with it,
421*4882a593Smuzhiyun * handling any gaps for robustness.
422*4882a593Smuzhiyun */
423*4882a593Smuzhiyun u64 reg_end_pfn = reg->start_pfn + reg->nr_pages;
424*4882a593Smuzhiyun
425*4882a593Smuzhiyun WARN_ON((next->flags & KBASE_REG_ZONE_MASK) !=
426*4882a593Smuzhiyun (reg->flags & KBASE_REG_ZONE_MASK));
427*4882a593Smuzhiyun if (!WARN_ON(next->start_pfn < reg_end_pfn))
428*4882a593Smuzhiyun next->nr_pages += next->start_pfn - reg_end_pfn;
429*4882a593Smuzhiyun next->start_pfn = reg->start_pfn;
430*4882a593Smuzhiyun next->nr_pages += reg->nr_pages;
431*4882a593Smuzhiyun rb_erase(&(reg->rblink), reg_rbtree);
432*4882a593Smuzhiyun merged_back = 1;
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun }
435*4882a593Smuzhiyun
436*4882a593Smuzhiyun if (merged_front && merged_back) {
437*4882a593Smuzhiyun /* We already merged with prev, free it */
438*4882a593Smuzhiyun kfree(reg);
439*4882a593Smuzhiyun } else if (!(merged_front || merged_back)) {
440*4882a593Smuzhiyun /* If we failed to merge then we need to add a new block */
441*4882a593Smuzhiyun
442*4882a593Smuzhiyun /*
443*4882a593Smuzhiyun * We didn't merge anything. Try to add a new free
444*4882a593Smuzhiyun * placeholder, and in any case, remove the original one.
445*4882a593Smuzhiyun */
446*4882a593Smuzhiyun struct kbase_va_region *free_reg;
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun free_reg = kbase_alloc_free_region(kbdev, reg_rbtree, reg->start_pfn, reg->nr_pages,
449*4882a593Smuzhiyun reg->flags & KBASE_REG_ZONE_MASK);
450*4882a593Smuzhiyun if (!free_reg) {
451*4882a593Smuzhiyun /* In case of failure, we cannot allocate a replacement
452*4882a593Smuzhiyun * free region, so we will be left with a 'gap' in the
453*4882a593Smuzhiyun * region tracker's address range (though, the rbtree
454*4882a593Smuzhiyun * will itself still be correct after erasing
455*4882a593Smuzhiyun * 'reg').
456*4882a593Smuzhiyun *
457*4882a593Smuzhiyun * The gap will be rectified when an adjacent region is
458*4882a593Smuzhiyun * removed by one of the above merging paths. Other
459*4882a593Smuzhiyun * paths will gracefully fail to allocate if they try
460*4882a593Smuzhiyun * to allocate in the gap.
461*4882a593Smuzhiyun *
462*4882a593Smuzhiyun * There is nothing that the caller can do, since free
463*4882a593Smuzhiyun * paths must not fail. The existing 'reg' cannot be
464*4882a593Smuzhiyun * repurposed as the free region as callers must have
465*4882a593Smuzhiyun * freedom of use with it by virtue of it being owned
466*4882a593Smuzhiyun * by them, not the region tracker insert/remove code.
467*4882a593Smuzhiyun */
468*4882a593Smuzhiyun dev_warn(
469*4882a593Smuzhiyun kbdev->dev,
470*4882a593Smuzhiyun "Could not alloc a replacement free region for 0x%.16llx..0x%.16llx",
471*4882a593Smuzhiyun (unsigned long long)reg->start_pfn << PAGE_SHIFT,
472*4882a593Smuzhiyun (unsigned long long)(reg->start_pfn + reg->nr_pages) << PAGE_SHIFT);
473*4882a593Smuzhiyun rb_erase(&(reg->rblink), reg_rbtree);
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun goto out;
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun rb_replace_node(&(reg->rblink), &(free_reg->rblink), reg_rbtree);
478*4882a593Smuzhiyun }
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun /* This operation is always safe because the function never frees
481*4882a593Smuzhiyun * the region. If the region has been merged to both front and back,
482*4882a593Smuzhiyun * then it's the previous region that is supposed to be freed.
483*4882a593Smuzhiyun */
484*4882a593Smuzhiyun orig_reg->start_pfn = 0;
485*4882a593Smuzhiyun
486*4882a593Smuzhiyun out:
487*4882a593Smuzhiyun return;
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_remove_va_region);
491*4882a593Smuzhiyun
492*4882a593Smuzhiyun /**
493*4882a593Smuzhiyun * kbase_insert_va_region_nolock - Insert a VA region to the list,
494*4882a593Smuzhiyun * replacing the existing one.
495*4882a593Smuzhiyun *
496*4882a593Smuzhiyun * @kbdev: The kbase device
497*4882a593Smuzhiyun * @new_reg: The new region to insert
498*4882a593Smuzhiyun * @at_reg: The region to replace
499*4882a593Smuzhiyun * @start_pfn: The Page Frame Number to insert at
500*4882a593Smuzhiyun * @nr_pages: The number of pages of the region
501*4882a593Smuzhiyun *
502*4882a593Smuzhiyun * Return: 0 on success, error code otherwise.
503*4882a593Smuzhiyun */
kbase_insert_va_region_nolock(struct kbase_device * kbdev,struct kbase_va_region * new_reg,struct kbase_va_region * at_reg,u64 start_pfn,size_t nr_pages)504*4882a593Smuzhiyun static int kbase_insert_va_region_nolock(struct kbase_device *kbdev,
505*4882a593Smuzhiyun struct kbase_va_region *new_reg,
506*4882a593Smuzhiyun struct kbase_va_region *at_reg, u64 start_pfn,
507*4882a593Smuzhiyun size_t nr_pages)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun struct rb_root *reg_rbtree = NULL;
510*4882a593Smuzhiyun int err = 0;
511*4882a593Smuzhiyun
512*4882a593Smuzhiyun reg_rbtree = at_reg->rbtree;
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun /* Must be a free region */
515*4882a593Smuzhiyun KBASE_DEBUG_ASSERT((at_reg->flags & KBASE_REG_FREE) != 0);
516*4882a593Smuzhiyun /* start_pfn should be contained within at_reg */
517*4882a593Smuzhiyun KBASE_DEBUG_ASSERT((start_pfn >= at_reg->start_pfn) && (start_pfn < at_reg->start_pfn + at_reg->nr_pages));
518*4882a593Smuzhiyun /* at least nr_pages from start_pfn should be contained within at_reg */
519*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= at_reg->start_pfn + at_reg->nr_pages);
520*4882a593Smuzhiyun /* having at_reg means the rb_tree should not be empty */
521*4882a593Smuzhiyun if (WARN_ON(RB_EMPTY_ROOT(reg_rbtree)))
522*4882a593Smuzhiyun return -ENOMEM;
523*4882a593Smuzhiyun
524*4882a593Smuzhiyun new_reg->start_pfn = start_pfn;
525*4882a593Smuzhiyun new_reg->nr_pages = nr_pages;
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun /* Regions are a whole use, so swap and delete old one. */
528*4882a593Smuzhiyun if (at_reg->start_pfn == start_pfn && at_reg->nr_pages == nr_pages) {
529*4882a593Smuzhiyun rb_replace_node(&(at_reg->rblink), &(new_reg->rblink),
530*4882a593Smuzhiyun reg_rbtree);
531*4882a593Smuzhiyun kfree(at_reg);
532*4882a593Smuzhiyun }
533*4882a593Smuzhiyun /* New region replaces the start of the old one, so insert before. */
534*4882a593Smuzhiyun else if (at_reg->start_pfn == start_pfn) {
535*4882a593Smuzhiyun at_reg->start_pfn += nr_pages;
536*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(at_reg->nr_pages >= nr_pages);
537*4882a593Smuzhiyun at_reg->nr_pages -= nr_pages;
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun kbase_region_tracker_insert(new_reg);
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun /* New region replaces the end of the old one, so insert after. */
542*4882a593Smuzhiyun else if ((at_reg->start_pfn + at_reg->nr_pages) == (start_pfn + nr_pages)) {
543*4882a593Smuzhiyun at_reg->nr_pages -= nr_pages;
544*4882a593Smuzhiyun
545*4882a593Smuzhiyun kbase_region_tracker_insert(new_reg);
546*4882a593Smuzhiyun }
547*4882a593Smuzhiyun /* New region splits the old one, so insert and create new */
548*4882a593Smuzhiyun else {
549*4882a593Smuzhiyun struct kbase_va_region *new_front_reg;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun new_front_reg = kbase_alloc_free_region(kbdev, reg_rbtree, at_reg->start_pfn,
552*4882a593Smuzhiyun start_pfn - at_reg->start_pfn,
553*4882a593Smuzhiyun at_reg->flags & KBASE_REG_ZONE_MASK);
554*4882a593Smuzhiyun
555*4882a593Smuzhiyun if (new_front_reg) {
556*4882a593Smuzhiyun at_reg->nr_pages -= nr_pages + new_front_reg->nr_pages;
557*4882a593Smuzhiyun at_reg->start_pfn = start_pfn + nr_pages;
558*4882a593Smuzhiyun
559*4882a593Smuzhiyun kbase_region_tracker_insert(new_front_reg);
560*4882a593Smuzhiyun kbase_region_tracker_insert(new_reg);
561*4882a593Smuzhiyun } else {
562*4882a593Smuzhiyun err = -ENOMEM;
563*4882a593Smuzhiyun }
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun
566*4882a593Smuzhiyun return err;
567*4882a593Smuzhiyun }
568*4882a593Smuzhiyun
569*4882a593Smuzhiyun /**
570*4882a593Smuzhiyun * kbase_add_va_region - Add a VA region to the region list for a context.
571*4882a593Smuzhiyun *
572*4882a593Smuzhiyun * @kctx: kbase context containing the region
573*4882a593Smuzhiyun * @reg: the region to add
574*4882a593Smuzhiyun * @addr: the address to insert the region at
575*4882a593Smuzhiyun * @nr_pages: the number of pages in the region
576*4882a593Smuzhiyun * @align: the minimum alignment in pages
577*4882a593Smuzhiyun *
578*4882a593Smuzhiyun * Return: 0 on success, error code otherwise.
579*4882a593Smuzhiyun */
kbase_add_va_region(struct kbase_context * kctx,struct kbase_va_region * reg,u64 addr,size_t nr_pages,size_t align)580*4882a593Smuzhiyun int kbase_add_va_region(struct kbase_context *kctx,
581*4882a593Smuzhiyun struct kbase_va_region *reg, u64 addr,
582*4882a593Smuzhiyun size_t nr_pages, size_t align)
583*4882a593Smuzhiyun {
584*4882a593Smuzhiyun int err = 0;
585*4882a593Smuzhiyun struct kbase_device *kbdev = kctx->kbdev;
586*4882a593Smuzhiyun int cpu_va_bits = kbase_get_num_cpu_va_bits(kctx);
587*4882a593Smuzhiyun int gpu_pc_bits =
588*4882a593Smuzhiyun kbdev->gpu_props.props.core_props.log2_program_counter_size;
589*4882a593Smuzhiyun
590*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
591*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(reg != NULL);
592*4882a593Smuzhiyun
593*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
594*4882a593Smuzhiyun
595*4882a593Smuzhiyun /* The executable allocation from the SAME_VA zone should already have an
596*4882a593Smuzhiyun * appropriately aligned GPU VA chosen for it.
597*4882a593Smuzhiyun * Also, executable allocations from EXEC_VA don't need the special
598*4882a593Smuzhiyun * alignment.
599*4882a593Smuzhiyun */
600*4882a593Smuzhiyun #if MALI_USE_CSF
601*4882a593Smuzhiyun /* The same is also true for the EXEC_FIXED_VA zone.
602*4882a593Smuzhiyun */
603*4882a593Smuzhiyun #endif
604*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_GPU_NX) && !addr &&
605*4882a593Smuzhiyun #if MALI_USE_CSF
606*4882a593Smuzhiyun ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_FIXED_VA) &&
607*4882a593Smuzhiyun #endif
608*4882a593Smuzhiyun ((reg->flags & KBASE_REG_ZONE_MASK) != KBASE_REG_ZONE_EXEC_VA)) {
609*4882a593Smuzhiyun if (cpu_va_bits > gpu_pc_bits) {
610*4882a593Smuzhiyun align = max(align, (size_t)((1ULL << gpu_pc_bits)
611*4882a593Smuzhiyun >> PAGE_SHIFT));
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun }
614*4882a593Smuzhiyun
615*4882a593Smuzhiyun do {
616*4882a593Smuzhiyun err = kbase_add_va_region_rbtree(kbdev, reg, addr, nr_pages,
617*4882a593Smuzhiyun align);
618*4882a593Smuzhiyun if (err != -ENOMEM)
619*4882a593Smuzhiyun break;
620*4882a593Smuzhiyun
621*4882a593Smuzhiyun /*
622*4882a593Smuzhiyun * If the allocation is not from the same zone as JIT
623*4882a593Smuzhiyun * then don't retry, we're out of VA and there is
624*4882a593Smuzhiyun * nothing which can be done about it.
625*4882a593Smuzhiyun */
626*4882a593Smuzhiyun if ((reg->flags & KBASE_REG_ZONE_MASK) !=
627*4882a593Smuzhiyun KBASE_REG_ZONE_CUSTOM_VA)
628*4882a593Smuzhiyun break;
629*4882a593Smuzhiyun } while (kbase_jit_evict(kctx));
630*4882a593Smuzhiyun
631*4882a593Smuzhiyun return err;
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_add_va_region);
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun /**
637*4882a593Smuzhiyun * kbase_add_va_region_rbtree - Insert a region into its corresponding rbtree
638*4882a593Smuzhiyun *
639*4882a593Smuzhiyun * @kbdev: The kbase device
640*4882a593Smuzhiyun * @reg: The region to add
641*4882a593Smuzhiyun * @addr: The address to add the region at, or 0 to map at any available address
642*4882a593Smuzhiyun * @nr_pages: The size of the region in pages
643*4882a593Smuzhiyun * @align: The minimum alignment in pages
644*4882a593Smuzhiyun *
645*4882a593Smuzhiyun * Insert a region into the rbtree that was specified when the region was
646*4882a593Smuzhiyun * created. If addr is 0 a free area in the rbtree is used, otherwise the
647*4882a593Smuzhiyun * specified address is used.
648*4882a593Smuzhiyun *
649*4882a593Smuzhiyun * Return: 0 on success, error code otherwise.
650*4882a593Smuzhiyun */
kbase_add_va_region_rbtree(struct kbase_device * kbdev,struct kbase_va_region * reg,u64 addr,size_t nr_pages,size_t align)651*4882a593Smuzhiyun int kbase_add_va_region_rbtree(struct kbase_device *kbdev,
652*4882a593Smuzhiyun struct kbase_va_region *reg,
653*4882a593Smuzhiyun u64 addr, size_t nr_pages, size_t align)
654*4882a593Smuzhiyun {
655*4882a593Smuzhiyun struct device *const dev = kbdev->dev;
656*4882a593Smuzhiyun struct rb_root *rbtree = NULL;
657*4882a593Smuzhiyun struct kbase_va_region *tmp;
658*4882a593Smuzhiyun u64 gpu_pfn = addr >> PAGE_SHIFT;
659*4882a593Smuzhiyun int err = 0;
660*4882a593Smuzhiyun
661*4882a593Smuzhiyun rbtree = reg->rbtree;
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun if (!align)
664*4882a593Smuzhiyun align = 1;
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun /* must be a power of 2 */
667*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(is_power_of_2(align));
668*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(nr_pages > 0);
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun /* Path 1: Map a specific address. Find the enclosing region,
671*4882a593Smuzhiyun * which *must* be free.
672*4882a593Smuzhiyun */
673*4882a593Smuzhiyun if (gpu_pfn) {
674*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(!(gpu_pfn & (align - 1)));
675*4882a593Smuzhiyun
676*4882a593Smuzhiyun tmp = find_region_enclosing_range_rbtree(rbtree, gpu_pfn,
677*4882a593Smuzhiyun nr_pages);
678*4882a593Smuzhiyun if (kbase_is_region_invalid(tmp)) {
679*4882a593Smuzhiyun dev_warn(dev, "Enclosing region not found or invalid: 0x%08llx gpu_pfn, %zu nr_pages", gpu_pfn, nr_pages);
680*4882a593Smuzhiyun err = -ENOMEM;
681*4882a593Smuzhiyun goto exit;
682*4882a593Smuzhiyun } else if (!kbase_is_region_free(tmp)) {
683*4882a593Smuzhiyun dev_warn(dev, "!(tmp->flags & KBASE_REG_FREE): tmp->start_pfn=0x%llx tmp->flags=0x%lx tmp->nr_pages=0x%zx gpu_pfn=0x%llx nr_pages=0x%zx\n",
684*4882a593Smuzhiyun tmp->start_pfn, tmp->flags,
685*4882a593Smuzhiyun tmp->nr_pages, gpu_pfn, nr_pages);
686*4882a593Smuzhiyun err = -ENOMEM;
687*4882a593Smuzhiyun goto exit;
688*4882a593Smuzhiyun }
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun err = kbase_insert_va_region_nolock(kbdev, reg, tmp, gpu_pfn, nr_pages);
691*4882a593Smuzhiyun if (err) {
692*4882a593Smuzhiyun dev_warn(dev, "Failed to insert va region");
693*4882a593Smuzhiyun err = -ENOMEM;
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun } else {
696*4882a593Smuzhiyun /* Path 2: Map any free address which meets the requirements. */
697*4882a593Smuzhiyun u64 start_pfn;
698*4882a593Smuzhiyun size_t align_offset = align;
699*4882a593Smuzhiyun size_t align_mask = align - 1;
700*4882a593Smuzhiyun
701*4882a593Smuzhiyun #if !MALI_USE_CSF
702*4882a593Smuzhiyun if ((reg->flags & KBASE_REG_TILER_ALIGN_TOP)) {
703*4882a593Smuzhiyun WARN(align > 1, "%s with align %lx might not be honored for KBASE_REG_TILER_ALIGN_TOP memory",
704*4882a593Smuzhiyun __func__,
705*4882a593Smuzhiyun (unsigned long)align);
706*4882a593Smuzhiyun align_mask = reg->extension - 1;
707*4882a593Smuzhiyun align_offset = reg->extension - reg->initial_commit;
708*4882a593Smuzhiyun }
709*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
710*4882a593Smuzhiyun
711*4882a593Smuzhiyun tmp = kbase_region_tracker_find_region_meeting_reqs(reg,
712*4882a593Smuzhiyun nr_pages, align_offset, align_mask,
713*4882a593Smuzhiyun &start_pfn);
714*4882a593Smuzhiyun if (tmp) {
715*4882a593Smuzhiyun err = kbase_insert_va_region_nolock(kbdev, reg, tmp, start_pfn, nr_pages);
716*4882a593Smuzhiyun if (unlikely(err)) {
717*4882a593Smuzhiyun dev_warn(dev, "Failed to insert region: 0x%08llx start_pfn, %zu nr_pages",
718*4882a593Smuzhiyun start_pfn, nr_pages);
719*4882a593Smuzhiyun }
720*4882a593Smuzhiyun } else {
721*4882a593Smuzhiyun dev_dbg(dev, "Failed to find a suitable region: %zu nr_pages, %zu align_offset, %zu align_mask\n",
722*4882a593Smuzhiyun nr_pages, align_offset, align_mask);
723*4882a593Smuzhiyun err = -ENOMEM;
724*4882a593Smuzhiyun }
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun
727*4882a593Smuzhiyun exit:
728*4882a593Smuzhiyun return err;
729*4882a593Smuzhiyun }
730*4882a593Smuzhiyun
731*4882a593Smuzhiyun /*
732*4882a593Smuzhiyun * @brief Initialize the internal region tracker data structure.
733*4882a593Smuzhiyun */
734*4882a593Smuzhiyun #if MALI_USE_CSF
kbase_region_tracker_ds_init(struct kbase_context * kctx,struct kbase_va_region * same_va_reg,struct kbase_va_region * custom_va_reg,struct kbase_va_region * exec_va_reg,struct kbase_va_region * exec_fixed_va_reg,struct kbase_va_region * fixed_va_reg)735*4882a593Smuzhiyun static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
736*4882a593Smuzhiyun struct kbase_va_region *same_va_reg,
737*4882a593Smuzhiyun struct kbase_va_region *custom_va_reg,
738*4882a593Smuzhiyun struct kbase_va_region *exec_va_reg,
739*4882a593Smuzhiyun struct kbase_va_region *exec_fixed_va_reg,
740*4882a593Smuzhiyun struct kbase_va_region *fixed_va_reg)
741*4882a593Smuzhiyun {
742*4882a593Smuzhiyun u64 last_zone_end_pfn;
743*4882a593Smuzhiyun
744*4882a593Smuzhiyun kctx->reg_rbtree_same = RB_ROOT;
745*4882a593Smuzhiyun kbase_region_tracker_insert(same_va_reg);
746*4882a593Smuzhiyun
747*4882a593Smuzhiyun last_zone_end_pfn = same_va_reg->start_pfn + same_va_reg->nr_pages;
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun /* Although custom_va_reg doesn't always exist, initialize
750*4882a593Smuzhiyun * unconditionally because of the mem_view debugfs
751*4882a593Smuzhiyun * implementation which relies on it being empty.
752*4882a593Smuzhiyun */
753*4882a593Smuzhiyun kctx->reg_rbtree_custom = RB_ROOT;
754*4882a593Smuzhiyun kctx->reg_rbtree_exec = RB_ROOT;
755*4882a593Smuzhiyun
756*4882a593Smuzhiyun if (custom_va_reg) {
757*4882a593Smuzhiyun WARN_ON(custom_va_reg->start_pfn < last_zone_end_pfn);
758*4882a593Smuzhiyun kbase_region_tracker_insert(custom_va_reg);
759*4882a593Smuzhiyun last_zone_end_pfn = custom_va_reg->start_pfn + custom_va_reg->nr_pages;
760*4882a593Smuzhiyun }
761*4882a593Smuzhiyun
762*4882a593Smuzhiyun /* Initialize exec, fixed and exec_fixed. These are always
763*4882a593Smuzhiyun * initialized at this stage, if they will exist at all.
764*4882a593Smuzhiyun */
765*4882a593Smuzhiyun kctx->reg_rbtree_fixed = RB_ROOT;
766*4882a593Smuzhiyun kctx->reg_rbtree_exec_fixed = RB_ROOT;
767*4882a593Smuzhiyun
768*4882a593Smuzhiyun if (exec_va_reg) {
769*4882a593Smuzhiyun WARN_ON(exec_va_reg->start_pfn < last_zone_end_pfn);
770*4882a593Smuzhiyun kbase_region_tracker_insert(exec_va_reg);
771*4882a593Smuzhiyun last_zone_end_pfn = exec_va_reg->start_pfn + exec_va_reg->nr_pages;
772*4882a593Smuzhiyun }
773*4882a593Smuzhiyun
774*4882a593Smuzhiyun if (exec_fixed_va_reg) {
775*4882a593Smuzhiyun WARN_ON(exec_fixed_va_reg->start_pfn < last_zone_end_pfn);
776*4882a593Smuzhiyun kbase_region_tracker_insert(exec_fixed_va_reg);
777*4882a593Smuzhiyun last_zone_end_pfn = exec_fixed_va_reg->start_pfn + exec_fixed_va_reg->nr_pages;
778*4882a593Smuzhiyun }
779*4882a593Smuzhiyun
780*4882a593Smuzhiyun if (fixed_va_reg) {
781*4882a593Smuzhiyun WARN_ON(fixed_va_reg->start_pfn < last_zone_end_pfn);
782*4882a593Smuzhiyun kbase_region_tracker_insert(fixed_va_reg);
783*4882a593Smuzhiyun last_zone_end_pfn = fixed_va_reg->start_pfn + fixed_va_reg->nr_pages;
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun }
786*4882a593Smuzhiyun #else
kbase_region_tracker_ds_init(struct kbase_context * kctx,struct kbase_va_region * same_va_reg,struct kbase_va_region * custom_va_reg)787*4882a593Smuzhiyun static void kbase_region_tracker_ds_init(struct kbase_context *kctx,
788*4882a593Smuzhiyun struct kbase_va_region *same_va_reg,
789*4882a593Smuzhiyun struct kbase_va_region *custom_va_reg)
790*4882a593Smuzhiyun {
791*4882a593Smuzhiyun kctx->reg_rbtree_same = RB_ROOT;
792*4882a593Smuzhiyun kbase_region_tracker_insert(same_va_reg);
793*4882a593Smuzhiyun
794*4882a593Smuzhiyun /* Although custom_va_reg and exec_va_reg don't always exist,
795*4882a593Smuzhiyun * initialize unconditionally because of the mem_view debugfs
796*4882a593Smuzhiyun * implementation which relies on them being empty.
797*4882a593Smuzhiyun *
798*4882a593Smuzhiyun * The difference between the two is that the EXEC_VA region
799*4882a593Smuzhiyun * is never initialized at this stage.
800*4882a593Smuzhiyun */
801*4882a593Smuzhiyun kctx->reg_rbtree_custom = RB_ROOT;
802*4882a593Smuzhiyun kctx->reg_rbtree_exec = RB_ROOT;
803*4882a593Smuzhiyun
804*4882a593Smuzhiyun if (custom_va_reg)
805*4882a593Smuzhiyun kbase_region_tracker_insert(custom_va_reg);
806*4882a593Smuzhiyun }
807*4882a593Smuzhiyun #endif /* MALI_USE_CSF */
808*4882a593Smuzhiyun
kbase_reg_flags_to_kctx(struct kbase_va_region * reg)809*4882a593Smuzhiyun static struct kbase_context *kbase_reg_flags_to_kctx(struct kbase_va_region *reg)
810*4882a593Smuzhiyun {
811*4882a593Smuzhiyun struct kbase_context *kctx = NULL;
812*4882a593Smuzhiyun struct rb_root *rbtree = reg->rbtree;
813*4882a593Smuzhiyun
814*4882a593Smuzhiyun switch (reg->flags & KBASE_REG_ZONE_MASK) {
815*4882a593Smuzhiyun case KBASE_REG_ZONE_CUSTOM_VA:
816*4882a593Smuzhiyun kctx = container_of(rbtree, struct kbase_context, reg_rbtree_custom);
817*4882a593Smuzhiyun break;
818*4882a593Smuzhiyun case KBASE_REG_ZONE_SAME_VA:
819*4882a593Smuzhiyun kctx = container_of(rbtree, struct kbase_context, reg_rbtree_same);
820*4882a593Smuzhiyun break;
821*4882a593Smuzhiyun case KBASE_REG_ZONE_EXEC_VA:
822*4882a593Smuzhiyun kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec);
823*4882a593Smuzhiyun break;
824*4882a593Smuzhiyun #if MALI_USE_CSF
825*4882a593Smuzhiyun case KBASE_REG_ZONE_EXEC_FIXED_VA:
826*4882a593Smuzhiyun kctx = container_of(rbtree, struct kbase_context, reg_rbtree_exec_fixed);
827*4882a593Smuzhiyun break;
828*4882a593Smuzhiyun case KBASE_REG_ZONE_FIXED_VA:
829*4882a593Smuzhiyun kctx = container_of(rbtree, struct kbase_context, reg_rbtree_fixed);
830*4882a593Smuzhiyun break;
831*4882a593Smuzhiyun case KBASE_REG_ZONE_MCU_SHARED:
832*4882a593Smuzhiyun /* This is only expected to be called on driver unload. */
833*4882a593Smuzhiyun break;
834*4882a593Smuzhiyun #endif
835*4882a593Smuzhiyun default:
836*4882a593Smuzhiyun WARN(1, "Unknown zone in region: flags=0x%lx\n", reg->flags);
837*4882a593Smuzhiyun break;
838*4882a593Smuzhiyun }
839*4882a593Smuzhiyun
840*4882a593Smuzhiyun return kctx;
841*4882a593Smuzhiyun }
842*4882a593Smuzhiyun
kbase_region_tracker_erase_rbtree(struct rb_root * rbtree)843*4882a593Smuzhiyun static void kbase_region_tracker_erase_rbtree(struct rb_root *rbtree)
844*4882a593Smuzhiyun {
845*4882a593Smuzhiyun struct rb_node *rbnode;
846*4882a593Smuzhiyun struct kbase_va_region *reg;
847*4882a593Smuzhiyun
848*4882a593Smuzhiyun do {
849*4882a593Smuzhiyun rbnode = rb_first(rbtree);
850*4882a593Smuzhiyun if (rbnode) {
851*4882a593Smuzhiyun rb_erase(rbnode, rbtree);
852*4882a593Smuzhiyun reg = rb_entry(rbnode, struct kbase_va_region, rblink);
853*4882a593Smuzhiyun WARN_ON(kbase_refcount_read(®->va_refcnt) != 1);
854*4882a593Smuzhiyun if (kbase_page_migration_enabled)
855*4882a593Smuzhiyun kbase_gpu_munmap(kbase_reg_flags_to_kctx(reg), reg);
856*4882a593Smuzhiyun /* Reset the start_pfn - as the rbtree is being
857*4882a593Smuzhiyun * destroyed and we've already erased this region, there
858*4882a593Smuzhiyun * is no further need to attempt to remove it.
859*4882a593Smuzhiyun * This won't affect the cleanup if the region was
860*4882a593Smuzhiyun * being used as a sticky resource as the cleanup
861*4882a593Smuzhiyun * related to sticky resources anyways need to be
862*4882a593Smuzhiyun * performed before the term of region tracker.
863*4882a593Smuzhiyun */
864*4882a593Smuzhiyun reg->start_pfn = 0;
865*4882a593Smuzhiyun kbase_free_alloced_region(reg);
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun } while (rbnode);
868*4882a593Smuzhiyun }
869*4882a593Smuzhiyun
kbase_region_tracker_term(struct kbase_context * kctx)870*4882a593Smuzhiyun void kbase_region_tracker_term(struct kbase_context *kctx)
871*4882a593Smuzhiyun {
872*4882a593Smuzhiyun WARN(kctx->as_nr != KBASEP_AS_NR_INVALID,
873*4882a593Smuzhiyun "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before erasing remaining regions",
874*4882a593Smuzhiyun kctx->tgid, kctx->id);
875*4882a593Smuzhiyun
876*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
877*4882a593Smuzhiyun kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_same);
878*4882a593Smuzhiyun kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_custom);
879*4882a593Smuzhiyun kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec);
880*4882a593Smuzhiyun #if MALI_USE_CSF
881*4882a593Smuzhiyun WARN_ON(!list_empty(&kctx->csf.event_pages_head));
882*4882a593Smuzhiyun kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_exec_fixed);
883*4882a593Smuzhiyun kbase_region_tracker_erase_rbtree(&kctx->reg_rbtree_fixed);
884*4882a593Smuzhiyun
885*4882a593Smuzhiyun #endif
886*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
887*4882a593Smuzhiyun }
888*4882a593Smuzhiyun
kbase_region_tracker_term_rbtree(struct rb_root * rbtree)889*4882a593Smuzhiyun void kbase_region_tracker_term_rbtree(struct rb_root *rbtree)
890*4882a593Smuzhiyun {
891*4882a593Smuzhiyun kbase_region_tracker_erase_rbtree(rbtree);
892*4882a593Smuzhiyun }
893*4882a593Smuzhiyun
kbase_get_same_va_bits(struct kbase_context * kctx)894*4882a593Smuzhiyun static size_t kbase_get_same_va_bits(struct kbase_context *kctx)
895*4882a593Smuzhiyun {
896*4882a593Smuzhiyun return min_t(size_t, kbase_get_num_cpu_va_bits(kctx),
897*4882a593Smuzhiyun kctx->kbdev->gpu_props.mmu.va_bits);
898*4882a593Smuzhiyun }
899*4882a593Smuzhiyun
kbase_region_tracker_init(struct kbase_context * kctx)900*4882a593Smuzhiyun int kbase_region_tracker_init(struct kbase_context *kctx)
901*4882a593Smuzhiyun {
902*4882a593Smuzhiyun struct kbase_va_region *same_va_reg;
903*4882a593Smuzhiyun struct kbase_va_region *custom_va_reg = NULL;
904*4882a593Smuzhiyun size_t same_va_bits = kbase_get_same_va_bits(kctx);
905*4882a593Smuzhiyun u64 custom_va_size = KBASE_REG_ZONE_CUSTOM_VA_SIZE;
906*4882a593Smuzhiyun u64 gpu_va_bits = kctx->kbdev->gpu_props.mmu.va_bits;
907*4882a593Smuzhiyun u64 gpu_va_limit = (1ULL << gpu_va_bits) >> PAGE_SHIFT;
908*4882a593Smuzhiyun u64 same_va_pages;
909*4882a593Smuzhiyun u64 same_va_base = 1u;
910*4882a593Smuzhiyun int err;
911*4882a593Smuzhiyun #if MALI_USE_CSF
912*4882a593Smuzhiyun struct kbase_va_region *exec_va_reg;
913*4882a593Smuzhiyun struct kbase_va_region *exec_fixed_va_reg;
914*4882a593Smuzhiyun struct kbase_va_region *fixed_va_reg;
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun u64 exec_va_base;
917*4882a593Smuzhiyun u64 fixed_va_end;
918*4882a593Smuzhiyun u64 exec_fixed_va_base;
919*4882a593Smuzhiyun u64 fixed_va_base;
920*4882a593Smuzhiyun u64 fixed_va_pages;
921*4882a593Smuzhiyun #endif
922*4882a593Smuzhiyun
923*4882a593Smuzhiyun /* Take the lock as kbase_free_alloced_region requires it */
924*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
925*4882a593Smuzhiyun
926*4882a593Smuzhiyun same_va_pages = (1ULL << (same_va_bits - PAGE_SHIFT)) - same_va_base;
927*4882a593Smuzhiyun
928*4882a593Smuzhiyun #if MALI_USE_CSF
929*4882a593Smuzhiyun if ((same_va_base + same_va_pages) > KBASE_REG_ZONE_EXEC_VA_BASE_64) {
930*4882a593Smuzhiyun /* Depending on how the kernel is configured, it's possible (eg on aarch64) for
931*4882a593Smuzhiyun * same_va_bits to reach 48 bits. Cap same_va_pages so that the same_va zone
932*4882a593Smuzhiyun * doesn't cross into the exec_va zone.
933*4882a593Smuzhiyun */
934*4882a593Smuzhiyun same_va_pages = KBASE_REG_ZONE_EXEC_VA_BASE_64 - same_va_base;
935*4882a593Smuzhiyun }
936*4882a593Smuzhiyun #endif
937*4882a593Smuzhiyun
938*4882a593Smuzhiyun /* all have SAME_VA */
939*4882a593Smuzhiyun same_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_same, same_va_base,
940*4882a593Smuzhiyun same_va_pages, KBASE_REG_ZONE_SAME_VA);
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun if (!same_va_reg) {
943*4882a593Smuzhiyun err = -ENOMEM;
944*4882a593Smuzhiyun goto fail_unlock;
945*4882a593Smuzhiyun }
946*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_SAME_VA, same_va_base,
947*4882a593Smuzhiyun same_va_pages);
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun if (kbase_ctx_compat_mode(kctx)) {
950*4882a593Smuzhiyun if (gpu_va_limit <= KBASE_REG_ZONE_CUSTOM_VA_BASE) {
951*4882a593Smuzhiyun err = -EINVAL;
952*4882a593Smuzhiyun goto fail_free_same_va;
953*4882a593Smuzhiyun }
954*4882a593Smuzhiyun /* If the current size of TMEM is out of range of the
955*4882a593Smuzhiyun * virtual address space addressable by the MMU then
956*4882a593Smuzhiyun * we should shrink it to fit
957*4882a593Smuzhiyun */
958*4882a593Smuzhiyun if ((KBASE_REG_ZONE_CUSTOM_VA_BASE + KBASE_REG_ZONE_CUSTOM_VA_SIZE) >= gpu_va_limit)
959*4882a593Smuzhiyun custom_va_size = gpu_va_limit - KBASE_REG_ZONE_CUSTOM_VA_BASE;
960*4882a593Smuzhiyun
961*4882a593Smuzhiyun custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom,
962*4882a593Smuzhiyun KBASE_REG_ZONE_CUSTOM_VA_BASE,
963*4882a593Smuzhiyun custom_va_size, KBASE_REG_ZONE_CUSTOM_VA);
964*4882a593Smuzhiyun
965*4882a593Smuzhiyun if (!custom_va_reg) {
966*4882a593Smuzhiyun err = -ENOMEM;
967*4882a593Smuzhiyun goto fail_free_same_va;
968*4882a593Smuzhiyun }
969*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA,
970*4882a593Smuzhiyun KBASE_REG_ZONE_CUSTOM_VA_BASE,
971*4882a593Smuzhiyun custom_va_size);
972*4882a593Smuzhiyun } else {
973*4882a593Smuzhiyun custom_va_size = 0;
974*4882a593Smuzhiyun }
975*4882a593Smuzhiyun
976*4882a593Smuzhiyun #if MALI_USE_CSF
977*4882a593Smuzhiyun /* The position of EXEC_VA depends on whether the client is 32-bit or 64-bit. */
978*4882a593Smuzhiyun exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_64;
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun /* Similarly the end of the FIXED_VA zone also depends on whether the client
981*4882a593Smuzhiyun * is 32 or 64-bits.
982*4882a593Smuzhiyun */
983*4882a593Smuzhiyun fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_64;
984*4882a593Smuzhiyun
985*4882a593Smuzhiyun if (kbase_ctx_compat_mode(kctx)) {
986*4882a593Smuzhiyun exec_va_base = KBASE_REG_ZONE_EXEC_VA_BASE_32;
987*4882a593Smuzhiyun fixed_va_end = KBASE_REG_ZONE_FIXED_VA_END_32;
988*4882a593Smuzhiyun }
989*4882a593Smuzhiyun
990*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, exec_va_base,
991*4882a593Smuzhiyun KBASE_REG_ZONE_EXEC_VA_SIZE);
992*4882a593Smuzhiyun
993*4882a593Smuzhiyun exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_base,
994*4882a593Smuzhiyun KBASE_REG_ZONE_EXEC_VA_SIZE, KBASE_REG_ZONE_EXEC_VA);
995*4882a593Smuzhiyun
996*4882a593Smuzhiyun if (!exec_va_reg) {
997*4882a593Smuzhiyun err = -ENOMEM;
998*4882a593Smuzhiyun goto fail_free_custom_va;
999*4882a593Smuzhiyun }
1000*4882a593Smuzhiyun
1001*4882a593Smuzhiyun exec_fixed_va_base = exec_va_base + KBASE_REG_ZONE_EXEC_VA_SIZE;
1002*4882a593Smuzhiyun
1003*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_FIXED_VA, exec_fixed_va_base,
1004*4882a593Smuzhiyun KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE);
1005*4882a593Smuzhiyun
1006*4882a593Smuzhiyun exec_fixed_va_reg =
1007*4882a593Smuzhiyun kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec_fixed,
1008*4882a593Smuzhiyun exec_fixed_va_base, KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE,
1009*4882a593Smuzhiyun KBASE_REG_ZONE_EXEC_FIXED_VA);
1010*4882a593Smuzhiyun
1011*4882a593Smuzhiyun if (!exec_fixed_va_reg) {
1012*4882a593Smuzhiyun err = -ENOMEM;
1013*4882a593Smuzhiyun goto fail_free_exec_va;
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun fixed_va_base = exec_fixed_va_base + KBASE_REG_ZONE_EXEC_FIXED_VA_SIZE;
1017*4882a593Smuzhiyun fixed_va_pages = fixed_va_end - fixed_va_base;
1018*4882a593Smuzhiyun
1019*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_FIXED_VA, fixed_va_base, fixed_va_pages);
1020*4882a593Smuzhiyun
1021*4882a593Smuzhiyun fixed_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_fixed, fixed_va_base,
1022*4882a593Smuzhiyun fixed_va_pages, KBASE_REG_ZONE_FIXED_VA);
1023*4882a593Smuzhiyun
1024*4882a593Smuzhiyun kctx->gpu_va_end = fixed_va_end;
1025*4882a593Smuzhiyun
1026*4882a593Smuzhiyun if (!fixed_va_reg) {
1027*4882a593Smuzhiyun err = -ENOMEM;
1028*4882a593Smuzhiyun goto fail_free_exec_fixed_va;
1029*4882a593Smuzhiyun }
1030*4882a593Smuzhiyun
1031*4882a593Smuzhiyun kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg, exec_va_reg,
1032*4882a593Smuzhiyun exec_fixed_va_reg, fixed_va_reg);
1033*4882a593Smuzhiyun
1034*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->csf.event_pages_head);
1035*4882a593Smuzhiyun #else
1036*4882a593Smuzhiyun /* EXEC_VA zone's codepaths are slightly easier when its base_pfn is
1037*4882a593Smuzhiyun * initially U64_MAX
1038*4882a593Smuzhiyun */
1039*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_EXEC_VA, U64_MAX, 0u);
1040*4882a593Smuzhiyun /* Other zones are 0: kbase_create_context() uses vzalloc */
1041*4882a593Smuzhiyun
1042*4882a593Smuzhiyun kbase_region_tracker_ds_init(kctx, same_va_reg, custom_va_reg);
1043*4882a593Smuzhiyun kctx->gpu_va_end = same_va_base + same_va_pages + custom_va_size;
1044*4882a593Smuzhiyun #endif
1045*4882a593Smuzhiyun kctx->jit_va = false;
1046*4882a593Smuzhiyun
1047*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
1048*4882a593Smuzhiyun return 0;
1049*4882a593Smuzhiyun
1050*4882a593Smuzhiyun #if MALI_USE_CSF
1051*4882a593Smuzhiyun fail_free_exec_fixed_va:
1052*4882a593Smuzhiyun kbase_free_alloced_region(exec_fixed_va_reg);
1053*4882a593Smuzhiyun fail_free_exec_va:
1054*4882a593Smuzhiyun kbase_free_alloced_region(exec_va_reg);
1055*4882a593Smuzhiyun fail_free_custom_va:
1056*4882a593Smuzhiyun if (custom_va_reg)
1057*4882a593Smuzhiyun kbase_free_alloced_region(custom_va_reg);
1058*4882a593Smuzhiyun #endif
1059*4882a593Smuzhiyun
1060*4882a593Smuzhiyun fail_free_same_va:
1061*4882a593Smuzhiyun kbase_free_alloced_region(same_va_reg);
1062*4882a593Smuzhiyun fail_unlock:
1063*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
1064*4882a593Smuzhiyun return err;
1065*4882a593Smuzhiyun }
1066*4882a593Smuzhiyun
kbase_has_exec_va_zone_locked(struct kbase_context * kctx)1067*4882a593Smuzhiyun static bool kbase_has_exec_va_zone_locked(struct kbase_context *kctx)
1068*4882a593Smuzhiyun {
1069*4882a593Smuzhiyun struct kbase_reg_zone *exec_va_zone;
1070*4882a593Smuzhiyun
1071*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
1072*4882a593Smuzhiyun exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
1073*4882a593Smuzhiyun
1074*4882a593Smuzhiyun return (exec_va_zone->base_pfn != U64_MAX);
1075*4882a593Smuzhiyun }
1076*4882a593Smuzhiyun
kbase_has_exec_va_zone(struct kbase_context * kctx)1077*4882a593Smuzhiyun bool kbase_has_exec_va_zone(struct kbase_context *kctx)
1078*4882a593Smuzhiyun {
1079*4882a593Smuzhiyun bool has_exec_va_zone;
1080*4882a593Smuzhiyun
1081*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
1082*4882a593Smuzhiyun has_exec_va_zone = kbase_has_exec_va_zone_locked(kctx);
1083*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
1084*4882a593Smuzhiyun
1085*4882a593Smuzhiyun return has_exec_va_zone;
1086*4882a593Smuzhiyun }
1087*4882a593Smuzhiyun
1088*4882a593Smuzhiyun /**
1089*4882a593Smuzhiyun * kbase_region_tracker_has_allocs - Determine if any allocations have been made
1090*4882a593Smuzhiyun * on a context's region tracker
1091*4882a593Smuzhiyun *
1092*4882a593Smuzhiyun * @kctx: KBase context
1093*4882a593Smuzhiyun *
1094*4882a593Smuzhiyun * Check the context to determine if any allocations have been made yet from
1095*4882a593Smuzhiyun * any of its zones. This check should be done before resizing a zone, e.g. to
1096*4882a593Smuzhiyun * make space to add a second zone.
1097*4882a593Smuzhiyun *
1098*4882a593Smuzhiyun * Whilst a zone without allocations can be resized whilst other zones have
1099*4882a593Smuzhiyun * allocations, we still check all of @kctx 's zones anyway: this is a stronger
1100*4882a593Smuzhiyun * guarantee and should be adhered to when creating new zones anyway.
1101*4882a593Smuzhiyun *
1102*4882a593Smuzhiyun * Allocations from kbdev zones are not counted.
1103*4882a593Smuzhiyun *
1104*4882a593Smuzhiyun * Return: true if any allocs exist on any zone, false otherwise
1105*4882a593Smuzhiyun */
kbase_region_tracker_has_allocs(struct kbase_context * kctx)1106*4882a593Smuzhiyun static bool kbase_region_tracker_has_allocs(struct kbase_context *kctx)
1107*4882a593Smuzhiyun {
1108*4882a593Smuzhiyun unsigned int zone_idx;
1109*4882a593Smuzhiyun
1110*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
1111*4882a593Smuzhiyun
1112*4882a593Smuzhiyun for (zone_idx = 0; zone_idx < KBASE_REG_ZONE_MAX; ++zone_idx) {
1113*4882a593Smuzhiyun struct kbase_reg_zone *zone;
1114*4882a593Smuzhiyun struct kbase_va_region *reg;
1115*4882a593Smuzhiyun u64 zone_base_addr;
1116*4882a593Smuzhiyun unsigned long zone_bits = KBASE_REG_ZONE(zone_idx);
1117*4882a593Smuzhiyun unsigned long reg_zone;
1118*4882a593Smuzhiyun
1119*4882a593Smuzhiyun if (!kbase_is_ctx_reg_zone(zone_bits))
1120*4882a593Smuzhiyun continue;
1121*4882a593Smuzhiyun zone = kbase_ctx_reg_zone_get(kctx, zone_bits);
1122*4882a593Smuzhiyun zone_base_addr = zone->base_pfn << PAGE_SHIFT;
1123*4882a593Smuzhiyun
1124*4882a593Smuzhiyun reg = kbase_region_tracker_find_region_base_address(
1125*4882a593Smuzhiyun kctx, zone_base_addr);
1126*4882a593Smuzhiyun
1127*4882a593Smuzhiyun if (!zone->va_size_pages) {
1128*4882a593Smuzhiyun WARN(reg,
1129*4882a593Smuzhiyun "Should not have found a region that starts at 0x%.16llx for zone 0x%lx",
1130*4882a593Smuzhiyun (unsigned long long)zone_base_addr, zone_bits);
1131*4882a593Smuzhiyun continue;
1132*4882a593Smuzhiyun }
1133*4882a593Smuzhiyun
1134*4882a593Smuzhiyun if (WARN(!reg,
1135*4882a593Smuzhiyun "There should always be a region that starts at 0x%.16llx for zone 0x%lx, couldn't find it",
1136*4882a593Smuzhiyun (unsigned long long)zone_base_addr, zone_bits))
1137*4882a593Smuzhiyun return true; /* Safest return value */
1138*4882a593Smuzhiyun
1139*4882a593Smuzhiyun reg_zone = reg->flags & KBASE_REG_ZONE_MASK;
1140*4882a593Smuzhiyun if (WARN(reg_zone != zone_bits,
1141*4882a593Smuzhiyun "The region that starts at 0x%.16llx should be in zone 0x%lx but was found in the wrong zone 0x%lx",
1142*4882a593Smuzhiyun (unsigned long long)zone_base_addr, zone_bits,
1143*4882a593Smuzhiyun reg_zone))
1144*4882a593Smuzhiyun return true; /* Safest return value */
1145*4882a593Smuzhiyun
1146*4882a593Smuzhiyun /* Unless the region is completely free, of the same size as
1147*4882a593Smuzhiyun * the original zone, then it has allocs
1148*4882a593Smuzhiyun */
1149*4882a593Smuzhiyun if ((!(reg->flags & KBASE_REG_FREE)) ||
1150*4882a593Smuzhiyun (reg->nr_pages != zone->va_size_pages))
1151*4882a593Smuzhiyun return true;
1152*4882a593Smuzhiyun }
1153*4882a593Smuzhiyun
1154*4882a593Smuzhiyun /* All zones are the same size as originally made, so there are no
1155*4882a593Smuzhiyun * allocs
1156*4882a593Smuzhiyun */
1157*4882a593Smuzhiyun return false;
1158*4882a593Smuzhiyun }
1159*4882a593Smuzhiyun
kbase_region_tracker_init_jit_64(struct kbase_context * kctx,u64 jit_va_pages)1160*4882a593Smuzhiyun static int kbase_region_tracker_init_jit_64(struct kbase_context *kctx,
1161*4882a593Smuzhiyun u64 jit_va_pages)
1162*4882a593Smuzhiyun {
1163*4882a593Smuzhiyun struct kbase_va_region *same_va_reg;
1164*4882a593Smuzhiyun struct kbase_reg_zone *same_va_zone;
1165*4882a593Smuzhiyun u64 same_va_zone_base_addr;
1166*4882a593Smuzhiyun const unsigned long same_va_zone_bits = KBASE_REG_ZONE_SAME_VA;
1167*4882a593Smuzhiyun struct kbase_va_region *custom_va_reg;
1168*4882a593Smuzhiyun u64 jit_va_start;
1169*4882a593Smuzhiyun
1170*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
1171*4882a593Smuzhiyun
1172*4882a593Smuzhiyun /*
1173*4882a593Smuzhiyun * Modify the same VA free region after creation. The caller has
1174*4882a593Smuzhiyun * ensured that allocations haven't been made, as any allocations could
1175*4882a593Smuzhiyun * cause an overlap to happen with existing same VA allocations and the
1176*4882a593Smuzhiyun * custom VA zone.
1177*4882a593Smuzhiyun */
1178*4882a593Smuzhiyun same_va_zone = kbase_ctx_reg_zone_get(kctx, same_va_zone_bits);
1179*4882a593Smuzhiyun same_va_zone_base_addr = same_va_zone->base_pfn << PAGE_SHIFT;
1180*4882a593Smuzhiyun
1181*4882a593Smuzhiyun same_va_reg = kbase_region_tracker_find_region_base_address(
1182*4882a593Smuzhiyun kctx, same_va_zone_base_addr);
1183*4882a593Smuzhiyun if (WARN(!same_va_reg,
1184*4882a593Smuzhiyun "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
1185*4882a593Smuzhiyun (unsigned long long)same_va_zone_base_addr, same_va_zone_bits))
1186*4882a593Smuzhiyun return -ENOMEM;
1187*4882a593Smuzhiyun
1188*4882a593Smuzhiyun /* kbase_region_tracker_has_allocs() in the caller has already ensured
1189*4882a593Smuzhiyun * that all of the zones have no allocs, so no need to check that again
1190*4882a593Smuzhiyun * on same_va_reg
1191*4882a593Smuzhiyun */
1192*4882a593Smuzhiyun WARN_ON((!(same_va_reg->flags & KBASE_REG_FREE)) ||
1193*4882a593Smuzhiyun same_va_reg->nr_pages != same_va_zone->va_size_pages);
1194*4882a593Smuzhiyun
1195*4882a593Smuzhiyun if (same_va_reg->nr_pages < jit_va_pages ||
1196*4882a593Smuzhiyun same_va_zone->va_size_pages < jit_va_pages)
1197*4882a593Smuzhiyun return -ENOMEM;
1198*4882a593Smuzhiyun
1199*4882a593Smuzhiyun /* It's safe to adjust the same VA zone now */
1200*4882a593Smuzhiyun same_va_reg->nr_pages -= jit_va_pages;
1201*4882a593Smuzhiyun same_va_zone->va_size_pages -= jit_va_pages;
1202*4882a593Smuzhiyun jit_va_start = kbase_reg_zone_end_pfn(same_va_zone);
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun /*
1205*4882a593Smuzhiyun * Create a custom VA zone at the end of the VA for allocations which
1206*4882a593Smuzhiyun * JIT can use so it doesn't have to allocate VA from the kernel.
1207*4882a593Smuzhiyun */
1208*4882a593Smuzhiyun custom_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_custom, jit_va_start,
1209*4882a593Smuzhiyun jit_va_pages, KBASE_REG_ZONE_CUSTOM_VA);
1210*4882a593Smuzhiyun
1211*4882a593Smuzhiyun /*
1212*4882a593Smuzhiyun * The context will be destroyed if we fail here so no point
1213*4882a593Smuzhiyun * reverting the change we made to same_va.
1214*4882a593Smuzhiyun */
1215*4882a593Smuzhiyun if (!custom_va_reg)
1216*4882a593Smuzhiyun return -ENOMEM;
1217*4882a593Smuzhiyun /* Since this is 64-bit, the custom zone will not have been
1218*4882a593Smuzhiyun * initialized, so initialize it now
1219*4882a593Smuzhiyun */
1220*4882a593Smuzhiyun kbase_ctx_reg_zone_init(kctx, KBASE_REG_ZONE_CUSTOM_VA, jit_va_start,
1221*4882a593Smuzhiyun jit_va_pages);
1222*4882a593Smuzhiyun
1223*4882a593Smuzhiyun kbase_region_tracker_insert(custom_va_reg);
1224*4882a593Smuzhiyun return 0;
1225*4882a593Smuzhiyun }
1226*4882a593Smuzhiyun
kbase_region_tracker_init_jit(struct kbase_context * kctx,u64 jit_va_pages,int max_allocations,int trim_level,int group_id,u64 phys_pages_limit)1227*4882a593Smuzhiyun int kbase_region_tracker_init_jit(struct kbase_context *kctx, u64 jit_va_pages,
1228*4882a593Smuzhiyun int max_allocations, int trim_level, int group_id,
1229*4882a593Smuzhiyun u64 phys_pages_limit)
1230*4882a593Smuzhiyun {
1231*4882a593Smuzhiyun int err = 0;
1232*4882a593Smuzhiyun
1233*4882a593Smuzhiyun if (trim_level < 0 || trim_level > BASE_JIT_MAX_TRIM_LEVEL)
1234*4882a593Smuzhiyun return -EINVAL;
1235*4882a593Smuzhiyun
1236*4882a593Smuzhiyun if (group_id < 0 || group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)
1237*4882a593Smuzhiyun return -EINVAL;
1238*4882a593Smuzhiyun
1239*4882a593Smuzhiyun if (phys_pages_limit > jit_va_pages)
1240*4882a593Smuzhiyun return -EINVAL;
1241*4882a593Smuzhiyun
1242*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
1243*4882a593Smuzhiyun if (phys_pages_limit != jit_va_pages)
1244*4882a593Smuzhiyun kbase_ctx_flag_set(kctx, KCTX_JPL_ENABLED);
1245*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
1246*4882a593Smuzhiyun
1247*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
1248*4882a593Smuzhiyun
1249*4882a593Smuzhiyun /* Verify that a JIT_VA zone has not been created already. */
1250*4882a593Smuzhiyun if (kctx->jit_va) {
1251*4882a593Smuzhiyun err = -EINVAL;
1252*4882a593Smuzhiyun goto exit_unlock;
1253*4882a593Smuzhiyun }
1254*4882a593Smuzhiyun
1255*4882a593Smuzhiyun /* If in 64-bit, we always lookup the SAME_VA zone. To ensure it has no
1256*4882a593Smuzhiyun * allocs, we can ensure there are no allocs anywhere.
1257*4882a593Smuzhiyun *
1258*4882a593Smuzhiyun * This check is also useful in 32-bit, just to make sure init of the
1259*4882a593Smuzhiyun * zone is always done before any allocs.
1260*4882a593Smuzhiyun */
1261*4882a593Smuzhiyun if (kbase_region_tracker_has_allocs(kctx)) {
1262*4882a593Smuzhiyun err = -ENOMEM;
1263*4882a593Smuzhiyun goto exit_unlock;
1264*4882a593Smuzhiyun }
1265*4882a593Smuzhiyun
1266*4882a593Smuzhiyun if (!kbase_ctx_compat_mode(kctx))
1267*4882a593Smuzhiyun err = kbase_region_tracker_init_jit_64(kctx, jit_va_pages);
1268*4882a593Smuzhiyun /*
1269*4882a593Smuzhiyun * Nothing to do for 32-bit clients, JIT uses the existing
1270*4882a593Smuzhiyun * custom VA zone.
1271*4882a593Smuzhiyun */
1272*4882a593Smuzhiyun
1273*4882a593Smuzhiyun if (!err) {
1274*4882a593Smuzhiyun kctx->jit_max_allocations = max_allocations;
1275*4882a593Smuzhiyun kctx->trim_level = trim_level;
1276*4882a593Smuzhiyun kctx->jit_va = true;
1277*4882a593Smuzhiyun kctx->jit_group_id = group_id;
1278*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
1279*4882a593Smuzhiyun kctx->jit_phys_pages_limit = phys_pages_limit;
1280*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev, "phys_pages_limit set to %llu\n",
1281*4882a593Smuzhiyun phys_pages_limit);
1282*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
1283*4882a593Smuzhiyun }
1284*4882a593Smuzhiyun
1285*4882a593Smuzhiyun exit_unlock:
1286*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
1287*4882a593Smuzhiyun
1288*4882a593Smuzhiyun return err;
1289*4882a593Smuzhiyun }
1290*4882a593Smuzhiyun
kbase_region_tracker_init_exec(struct kbase_context * kctx,u64 exec_va_pages)1291*4882a593Smuzhiyun int kbase_region_tracker_init_exec(struct kbase_context *kctx, u64 exec_va_pages)
1292*4882a593Smuzhiyun {
1293*4882a593Smuzhiyun #if !MALI_USE_CSF
1294*4882a593Smuzhiyun struct kbase_va_region *exec_va_reg;
1295*4882a593Smuzhiyun struct kbase_reg_zone *exec_va_zone;
1296*4882a593Smuzhiyun struct kbase_reg_zone *target_zone;
1297*4882a593Smuzhiyun struct kbase_va_region *target_reg;
1298*4882a593Smuzhiyun u64 target_zone_base_addr;
1299*4882a593Smuzhiyun unsigned long target_zone_bits;
1300*4882a593Smuzhiyun u64 exec_va_start;
1301*4882a593Smuzhiyun int err;
1302*4882a593Smuzhiyun #endif
1303*4882a593Smuzhiyun
1304*4882a593Smuzhiyun /* The EXEC_VA zone shall be created by making space either:
1305*4882a593Smuzhiyun * - for 64-bit clients, at the end of the process's address space
1306*4882a593Smuzhiyun * - for 32-bit clients, in the CUSTOM zone
1307*4882a593Smuzhiyun *
1308*4882a593Smuzhiyun * Firstly, verify that the number of EXEC_VA pages requested by the
1309*4882a593Smuzhiyun * client is reasonable and then make sure that it is not greater than
1310*4882a593Smuzhiyun * the address space itself before calculating the base address of the
1311*4882a593Smuzhiyun * new zone.
1312*4882a593Smuzhiyun */
1313*4882a593Smuzhiyun if (exec_va_pages == 0 || exec_va_pages > KBASE_REG_ZONE_EXEC_VA_MAX_PAGES)
1314*4882a593Smuzhiyun return -EINVAL;
1315*4882a593Smuzhiyun
1316*4882a593Smuzhiyun #if MALI_USE_CSF
1317*4882a593Smuzhiyun /* For CSF GPUs we now setup the EXEC_VA zone during initialization,
1318*4882a593Smuzhiyun * so this request is a null-op.
1319*4882a593Smuzhiyun */
1320*4882a593Smuzhiyun return 0;
1321*4882a593Smuzhiyun #else
1322*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
1323*4882a593Smuzhiyun
1324*4882a593Smuzhiyun /* Verify that we've not already created a EXEC_VA zone, and that the
1325*4882a593Smuzhiyun * EXEC_VA zone must come before JIT's CUSTOM_VA.
1326*4882a593Smuzhiyun */
1327*4882a593Smuzhiyun if (kbase_has_exec_va_zone_locked(kctx) || kctx->jit_va) {
1328*4882a593Smuzhiyun err = -EPERM;
1329*4882a593Smuzhiyun goto exit_unlock;
1330*4882a593Smuzhiyun }
1331*4882a593Smuzhiyun
1332*4882a593Smuzhiyun if (exec_va_pages > kctx->gpu_va_end) {
1333*4882a593Smuzhiyun err = -ENOMEM;
1334*4882a593Smuzhiyun goto exit_unlock;
1335*4882a593Smuzhiyun }
1336*4882a593Smuzhiyun
1337*4882a593Smuzhiyun /* Verify no allocations have already been made */
1338*4882a593Smuzhiyun if (kbase_region_tracker_has_allocs(kctx)) {
1339*4882a593Smuzhiyun err = -ENOMEM;
1340*4882a593Smuzhiyun goto exit_unlock;
1341*4882a593Smuzhiyun }
1342*4882a593Smuzhiyun
1343*4882a593Smuzhiyun if (kbase_ctx_compat_mode(kctx)) {
1344*4882a593Smuzhiyun /* 32-bit client: take from CUSTOM_VA zone */
1345*4882a593Smuzhiyun target_zone_bits = KBASE_REG_ZONE_CUSTOM_VA;
1346*4882a593Smuzhiyun } else {
1347*4882a593Smuzhiyun /* 64-bit client: take from SAME_VA zone */
1348*4882a593Smuzhiyun target_zone_bits = KBASE_REG_ZONE_SAME_VA;
1349*4882a593Smuzhiyun }
1350*4882a593Smuzhiyun
1351*4882a593Smuzhiyun target_zone = kbase_ctx_reg_zone_get(kctx, target_zone_bits);
1352*4882a593Smuzhiyun target_zone_base_addr = target_zone->base_pfn << PAGE_SHIFT;
1353*4882a593Smuzhiyun
1354*4882a593Smuzhiyun target_reg = kbase_region_tracker_find_region_base_address(
1355*4882a593Smuzhiyun kctx, target_zone_base_addr);
1356*4882a593Smuzhiyun if (WARN(!target_reg,
1357*4882a593Smuzhiyun "Already found a free region at the start of every zone, but now cannot find any region for zone base 0x%.16llx zone 0x%lx",
1358*4882a593Smuzhiyun (unsigned long long)target_zone_base_addr, target_zone_bits)) {
1359*4882a593Smuzhiyun err = -ENOMEM;
1360*4882a593Smuzhiyun goto exit_unlock;
1361*4882a593Smuzhiyun }
1362*4882a593Smuzhiyun /* kbase_region_tracker_has_allocs() above has already ensured that all
1363*4882a593Smuzhiyun * of the zones have no allocs, so no need to check that again on
1364*4882a593Smuzhiyun * target_reg
1365*4882a593Smuzhiyun */
1366*4882a593Smuzhiyun WARN_ON((!(target_reg->flags & KBASE_REG_FREE)) ||
1367*4882a593Smuzhiyun target_reg->nr_pages != target_zone->va_size_pages);
1368*4882a593Smuzhiyun
1369*4882a593Smuzhiyun if (target_reg->nr_pages <= exec_va_pages ||
1370*4882a593Smuzhiyun target_zone->va_size_pages <= exec_va_pages) {
1371*4882a593Smuzhiyun err = -ENOMEM;
1372*4882a593Smuzhiyun goto exit_unlock;
1373*4882a593Smuzhiyun }
1374*4882a593Smuzhiyun
1375*4882a593Smuzhiyun /* Taken from the end of the target zone */
1376*4882a593Smuzhiyun exec_va_start = kbase_reg_zone_end_pfn(target_zone) - exec_va_pages;
1377*4882a593Smuzhiyun
1378*4882a593Smuzhiyun exec_va_reg = kbase_alloc_free_region(kctx->kbdev, &kctx->reg_rbtree_exec, exec_va_start,
1379*4882a593Smuzhiyun exec_va_pages, KBASE_REG_ZONE_EXEC_VA);
1380*4882a593Smuzhiyun if (!exec_va_reg) {
1381*4882a593Smuzhiyun err = -ENOMEM;
1382*4882a593Smuzhiyun goto exit_unlock;
1383*4882a593Smuzhiyun }
1384*4882a593Smuzhiyun /* Update EXEC_VA zone
1385*4882a593Smuzhiyun *
1386*4882a593Smuzhiyun * not using kbase_ctx_reg_zone_init() - it was already initialized
1387*4882a593Smuzhiyun */
1388*4882a593Smuzhiyun exec_va_zone = kbase_ctx_reg_zone_get(kctx, KBASE_REG_ZONE_EXEC_VA);
1389*4882a593Smuzhiyun exec_va_zone->base_pfn = exec_va_start;
1390*4882a593Smuzhiyun exec_va_zone->va_size_pages = exec_va_pages;
1391*4882a593Smuzhiyun
1392*4882a593Smuzhiyun /* Update target zone and corresponding region */
1393*4882a593Smuzhiyun target_reg->nr_pages -= exec_va_pages;
1394*4882a593Smuzhiyun target_zone->va_size_pages -= exec_va_pages;
1395*4882a593Smuzhiyun
1396*4882a593Smuzhiyun kbase_region_tracker_insert(exec_va_reg);
1397*4882a593Smuzhiyun err = 0;
1398*4882a593Smuzhiyun
1399*4882a593Smuzhiyun exit_unlock:
1400*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
1401*4882a593Smuzhiyun return err;
1402*4882a593Smuzhiyun #endif /* MALI_USE_CSF */
1403*4882a593Smuzhiyun }
1404*4882a593Smuzhiyun
1405*4882a593Smuzhiyun #if MALI_USE_CSF
kbase_mcu_shared_interface_region_tracker_term(struct kbase_device * kbdev)1406*4882a593Smuzhiyun void kbase_mcu_shared_interface_region_tracker_term(struct kbase_device *kbdev)
1407*4882a593Smuzhiyun {
1408*4882a593Smuzhiyun kbase_region_tracker_term_rbtree(&kbdev->csf.shared_reg_rbtree);
1409*4882a593Smuzhiyun }
1410*4882a593Smuzhiyun
kbase_mcu_shared_interface_region_tracker_init(struct kbase_device * kbdev)1411*4882a593Smuzhiyun int kbase_mcu_shared_interface_region_tracker_init(struct kbase_device *kbdev)
1412*4882a593Smuzhiyun {
1413*4882a593Smuzhiyun struct kbase_va_region *shared_reg;
1414*4882a593Smuzhiyun u64 shared_reg_start_pfn;
1415*4882a593Smuzhiyun u64 shared_reg_size;
1416*4882a593Smuzhiyun
1417*4882a593Smuzhiyun shared_reg_start_pfn = KBASE_REG_ZONE_MCU_SHARED_BASE;
1418*4882a593Smuzhiyun shared_reg_size = KBASE_REG_ZONE_MCU_SHARED_SIZE;
1419*4882a593Smuzhiyun
1420*4882a593Smuzhiyun kbdev->csf.shared_reg_rbtree = RB_ROOT;
1421*4882a593Smuzhiyun
1422*4882a593Smuzhiyun shared_reg =
1423*4882a593Smuzhiyun kbase_alloc_free_region(kbdev, &kbdev->csf.shared_reg_rbtree, shared_reg_start_pfn,
1424*4882a593Smuzhiyun shared_reg_size, KBASE_REG_ZONE_MCU_SHARED);
1425*4882a593Smuzhiyun if (!shared_reg)
1426*4882a593Smuzhiyun return -ENOMEM;
1427*4882a593Smuzhiyun
1428*4882a593Smuzhiyun kbase_region_tracker_insert(shared_reg);
1429*4882a593Smuzhiyun return 0;
1430*4882a593Smuzhiyun }
1431*4882a593Smuzhiyun #endif
1432*4882a593Smuzhiyun
kbasep_mem_page_size_init(struct kbase_device * kbdev)1433*4882a593Smuzhiyun static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
1434*4882a593Smuzhiyun {
1435*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE)
1436*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC)
1437*4882a593Smuzhiyun kbdev->pagesize_2mb = true;
1438*4882a593Smuzhiyun if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC) != 1) {
1439*4882a593Smuzhiyun dev_warn(
1440*4882a593Smuzhiyun kbdev->dev,
1441*4882a593Smuzhiyun "2MB page is enabled by force while current GPU-HW doesn't meet the requirement to do so.\n");
1442*4882a593Smuzhiyun }
1443*4882a593Smuzhiyun #else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
1444*4882a593Smuzhiyun kbdev->pagesize_2mb = false;
1445*4882a593Smuzhiyun #endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC) */
1446*4882a593Smuzhiyun #else /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
1447*4882a593Smuzhiyun /* Set it to the default based on which GPU is present */
1448*4882a593Smuzhiyun kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
1449*4882a593Smuzhiyun #endif /* IS_ENABLED(CONFIG_LARGE_PAGE_ALLOC_OVERRIDE) */
1450*4882a593Smuzhiyun }
1451*4882a593Smuzhiyun
kbase_mem_init(struct kbase_device * kbdev)1452*4882a593Smuzhiyun int kbase_mem_init(struct kbase_device *kbdev)
1453*4882a593Smuzhiyun {
1454*4882a593Smuzhiyun int err = 0;
1455*4882a593Smuzhiyun struct kbasep_mem_device *memdev;
1456*4882a593Smuzhiyun char va_region_slab_name[VA_REGION_SLAB_NAME_SIZE];
1457*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_OF)
1458*4882a593Smuzhiyun struct device_node *mgm_node = NULL;
1459*4882a593Smuzhiyun #endif
1460*4882a593Smuzhiyun
1461*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kbdev);
1462*4882a593Smuzhiyun
1463*4882a593Smuzhiyun memdev = &kbdev->memdev;
1464*4882a593Smuzhiyun
1465*4882a593Smuzhiyun kbasep_mem_page_size_init(kbdev);
1466*4882a593Smuzhiyun
1467*4882a593Smuzhiyun scnprintf(va_region_slab_name, VA_REGION_SLAB_NAME_SIZE, VA_REGION_SLAB_NAME_PREFIX "%s",
1468*4882a593Smuzhiyun kbdev->devname);
1469*4882a593Smuzhiyun
1470*4882a593Smuzhiyun /* Initialize slab cache for kbase_va_regions */
1471*4882a593Smuzhiyun kbdev->va_region_slab =
1472*4882a593Smuzhiyun kmem_cache_create(va_region_slab_name, sizeof(struct kbase_va_region), 0, 0, NULL);
1473*4882a593Smuzhiyun if (kbdev->va_region_slab == NULL) {
1474*4882a593Smuzhiyun dev_err(kbdev->dev, "Failed to create va_region_slab\n");
1475*4882a593Smuzhiyun return -ENOMEM;
1476*4882a593Smuzhiyun }
1477*4882a593Smuzhiyun
1478*4882a593Smuzhiyun kbase_mem_migrate_init(kbdev);
1479*4882a593Smuzhiyun kbase_mem_pool_group_config_set_max_size(&kbdev->mem_pool_defaults,
1480*4882a593Smuzhiyun KBASE_MEM_POOL_MAX_SIZE_KCTX);
1481*4882a593Smuzhiyun
1482*4882a593Smuzhiyun /* Initialize memory usage */
1483*4882a593Smuzhiyun atomic_set(&memdev->used_pages, 0);
1484*4882a593Smuzhiyun
1485*4882a593Smuzhiyun spin_lock_init(&kbdev->gpu_mem_usage_lock);
1486*4882a593Smuzhiyun kbdev->total_gpu_pages = 0;
1487*4882a593Smuzhiyun kbdev->process_root = RB_ROOT;
1488*4882a593Smuzhiyun kbdev->dma_buf_root = RB_ROOT;
1489*4882a593Smuzhiyun mutex_init(&kbdev->dma_buf_lock);
1490*4882a593Smuzhiyun
1491*4882a593Smuzhiyun #ifdef IR_THRESHOLD
1492*4882a593Smuzhiyun atomic_set(&memdev->ir_threshold, IR_THRESHOLD);
1493*4882a593Smuzhiyun #else
1494*4882a593Smuzhiyun atomic_set(&memdev->ir_threshold, DEFAULT_IR_THRESHOLD);
1495*4882a593Smuzhiyun #endif
1496*4882a593Smuzhiyun
1497*4882a593Smuzhiyun kbdev->mgm_dev = &kbase_native_mgm_dev;
1498*4882a593Smuzhiyun
1499*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_OF)
1500*4882a593Smuzhiyun /* Check to see whether or not a platform-specific memory group manager
1501*4882a593Smuzhiyun * is configured and available.
1502*4882a593Smuzhiyun */
1503*4882a593Smuzhiyun mgm_node = of_parse_phandle(kbdev->dev->of_node,
1504*4882a593Smuzhiyun "physical-memory-group-manager", 0);
1505*4882a593Smuzhiyun if (!mgm_node) {
1506*4882a593Smuzhiyun dev_info(kbdev->dev,
1507*4882a593Smuzhiyun "No memory group manager is configured\n");
1508*4882a593Smuzhiyun } else {
1509*4882a593Smuzhiyun struct platform_device *const pdev =
1510*4882a593Smuzhiyun of_find_device_by_node(mgm_node);
1511*4882a593Smuzhiyun
1512*4882a593Smuzhiyun if (!pdev) {
1513*4882a593Smuzhiyun dev_err(kbdev->dev,
1514*4882a593Smuzhiyun "The configured memory group manager was not found\n");
1515*4882a593Smuzhiyun } else {
1516*4882a593Smuzhiyun kbdev->mgm_dev = platform_get_drvdata(pdev);
1517*4882a593Smuzhiyun if (!kbdev->mgm_dev) {
1518*4882a593Smuzhiyun dev_info(kbdev->dev,
1519*4882a593Smuzhiyun "Memory group manager is not ready\n");
1520*4882a593Smuzhiyun err = -EPROBE_DEFER;
1521*4882a593Smuzhiyun } else if (!try_module_get(kbdev->mgm_dev->owner)) {
1522*4882a593Smuzhiyun dev_err(kbdev->dev,
1523*4882a593Smuzhiyun "Failed to get memory group manger module\n");
1524*4882a593Smuzhiyun err = -ENODEV;
1525*4882a593Smuzhiyun kbdev->mgm_dev = NULL;
1526*4882a593Smuzhiyun } else {
1527*4882a593Smuzhiyun dev_info(kbdev->dev,
1528*4882a593Smuzhiyun "Memory group manager successfully loaded\n");
1529*4882a593Smuzhiyun }
1530*4882a593Smuzhiyun }
1531*4882a593Smuzhiyun of_node_put(mgm_node);
1532*4882a593Smuzhiyun }
1533*4882a593Smuzhiyun #endif
1534*4882a593Smuzhiyun
1535*4882a593Smuzhiyun if (likely(!err)) {
1536*4882a593Smuzhiyun struct kbase_mem_pool_group_config mem_pool_defaults;
1537*4882a593Smuzhiyun
1538*4882a593Smuzhiyun kbase_mem_pool_group_config_set_max_size(&mem_pool_defaults,
1539*4882a593Smuzhiyun KBASE_MEM_POOL_MAX_SIZE_KBDEV);
1540*4882a593Smuzhiyun
1541*4882a593Smuzhiyun err = kbase_mem_pool_group_init(&kbdev->mem_pools, kbdev, &mem_pool_defaults, NULL);
1542*4882a593Smuzhiyun }
1543*4882a593Smuzhiyun
1544*4882a593Smuzhiyun return err;
1545*4882a593Smuzhiyun }
1546*4882a593Smuzhiyun
kbase_mem_halt(struct kbase_device * kbdev)1547*4882a593Smuzhiyun void kbase_mem_halt(struct kbase_device *kbdev)
1548*4882a593Smuzhiyun {
1549*4882a593Smuzhiyun CSTD_UNUSED(kbdev);
1550*4882a593Smuzhiyun }
1551*4882a593Smuzhiyun
kbase_mem_term(struct kbase_device * kbdev)1552*4882a593Smuzhiyun void kbase_mem_term(struct kbase_device *kbdev)
1553*4882a593Smuzhiyun {
1554*4882a593Smuzhiyun struct kbasep_mem_device *memdev;
1555*4882a593Smuzhiyun int pages;
1556*4882a593Smuzhiyun
1557*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kbdev);
1558*4882a593Smuzhiyun
1559*4882a593Smuzhiyun memdev = &kbdev->memdev;
1560*4882a593Smuzhiyun
1561*4882a593Smuzhiyun pages = atomic_read(&memdev->used_pages);
1562*4882a593Smuzhiyun if (pages != 0)
1563*4882a593Smuzhiyun dev_warn(kbdev->dev, "%s: %d pages in use!\n", __func__, pages);
1564*4882a593Smuzhiyun
1565*4882a593Smuzhiyun kbase_mem_pool_group_term(&kbdev->mem_pools);
1566*4882a593Smuzhiyun
1567*4882a593Smuzhiyun kbase_mem_migrate_term(kbdev);
1568*4882a593Smuzhiyun
1569*4882a593Smuzhiyun kmem_cache_destroy(kbdev->va_region_slab);
1570*4882a593Smuzhiyun kbdev->va_region_slab = NULL;
1571*4882a593Smuzhiyun
1572*4882a593Smuzhiyun WARN_ON(kbdev->total_gpu_pages);
1573*4882a593Smuzhiyun WARN_ON(!RB_EMPTY_ROOT(&kbdev->process_root));
1574*4882a593Smuzhiyun WARN_ON(!RB_EMPTY_ROOT(&kbdev->dma_buf_root));
1575*4882a593Smuzhiyun mutex_destroy(&kbdev->dma_buf_lock);
1576*4882a593Smuzhiyun
1577*4882a593Smuzhiyun if (kbdev->mgm_dev)
1578*4882a593Smuzhiyun module_put(kbdev->mgm_dev->owner);
1579*4882a593Smuzhiyun }
1580*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_mem_term);
1581*4882a593Smuzhiyun
1582*4882a593Smuzhiyun /**
1583*4882a593Smuzhiyun * kbase_alloc_free_region - Allocate a free region object.
1584*4882a593Smuzhiyun *
1585*4882a593Smuzhiyun * @kbdev: kbase device
1586*4882a593Smuzhiyun * @rbtree: Backlink to the red-black tree of memory regions.
1587*4882a593Smuzhiyun * @start_pfn: The Page Frame Number in GPU virtual address space.
1588*4882a593Smuzhiyun * @nr_pages: The size of the region in pages.
1589*4882a593Smuzhiyun * @zone: KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA
1590*4882a593Smuzhiyun *
1591*4882a593Smuzhiyun * The allocated object is not part of any list yet, and is flagged as
1592*4882a593Smuzhiyun * KBASE_REG_FREE. No mapping is allocated yet.
1593*4882a593Smuzhiyun *
1594*4882a593Smuzhiyun * zone is KBASE_REG_ZONE_CUSTOM_VA or KBASE_REG_ZONE_SAME_VA.
1595*4882a593Smuzhiyun *
1596*4882a593Smuzhiyun * Return: pointer to the allocated region object on success, NULL otherwise.
1597*4882a593Smuzhiyun */
kbase_alloc_free_region(struct kbase_device * kbdev,struct rb_root * rbtree,u64 start_pfn,size_t nr_pages,int zone)1598*4882a593Smuzhiyun struct kbase_va_region *kbase_alloc_free_region(struct kbase_device *kbdev, struct rb_root *rbtree,
1599*4882a593Smuzhiyun u64 start_pfn, size_t nr_pages, int zone)
1600*4882a593Smuzhiyun {
1601*4882a593Smuzhiyun struct kbase_va_region *new_reg;
1602*4882a593Smuzhiyun
1603*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(rbtree != NULL);
1604*4882a593Smuzhiyun
1605*4882a593Smuzhiyun /* zone argument should only contain zone related region flags */
1606*4882a593Smuzhiyun KBASE_DEBUG_ASSERT((zone & ~KBASE_REG_ZONE_MASK) == 0);
1607*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(nr_pages > 0);
1608*4882a593Smuzhiyun /* 64-bit address range is the max */
1609*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(start_pfn + nr_pages <= (U64_MAX / PAGE_SIZE));
1610*4882a593Smuzhiyun
1611*4882a593Smuzhiyun new_reg = kmem_cache_zalloc(kbdev->va_region_slab, GFP_KERNEL);
1612*4882a593Smuzhiyun
1613*4882a593Smuzhiyun if (!new_reg)
1614*4882a593Smuzhiyun return NULL;
1615*4882a593Smuzhiyun
1616*4882a593Smuzhiyun kbase_refcount_set(&new_reg->va_refcnt, 1);
1617*4882a593Smuzhiyun atomic_set(&new_reg->no_user_free_count, 0);
1618*4882a593Smuzhiyun new_reg->cpu_alloc = NULL; /* no alloc bound yet */
1619*4882a593Smuzhiyun new_reg->gpu_alloc = NULL; /* no alloc bound yet */
1620*4882a593Smuzhiyun new_reg->rbtree = rbtree;
1621*4882a593Smuzhiyun new_reg->flags = zone | KBASE_REG_FREE;
1622*4882a593Smuzhiyun
1623*4882a593Smuzhiyun new_reg->flags |= KBASE_REG_GROWABLE;
1624*4882a593Smuzhiyun
1625*4882a593Smuzhiyun new_reg->start_pfn = start_pfn;
1626*4882a593Smuzhiyun new_reg->nr_pages = nr_pages;
1627*4882a593Smuzhiyun
1628*4882a593Smuzhiyun INIT_LIST_HEAD(&new_reg->jit_node);
1629*4882a593Smuzhiyun INIT_LIST_HEAD(&new_reg->link);
1630*4882a593Smuzhiyun
1631*4882a593Smuzhiyun return new_reg;
1632*4882a593Smuzhiyun }
1633*4882a593Smuzhiyun
1634*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_alloc_free_region);
1635*4882a593Smuzhiyun
1636*4882a593Smuzhiyun /**
1637*4882a593Smuzhiyun * kbase_free_alloced_region - Free a region object.
1638*4882a593Smuzhiyun *
1639*4882a593Smuzhiyun * @reg: Region
1640*4882a593Smuzhiyun *
1641*4882a593Smuzhiyun * The described region must be freed of any mapping.
1642*4882a593Smuzhiyun *
1643*4882a593Smuzhiyun * If the region is not flagged as KBASE_REG_FREE, the region's
1644*4882a593Smuzhiyun * alloc object will be released.
1645*4882a593Smuzhiyun * It is a bug if no alloc object exists for non-free regions.
1646*4882a593Smuzhiyun *
1647*4882a593Smuzhiyun * If region is KBASE_REG_ZONE_MCU_SHARED it is freed
1648*4882a593Smuzhiyun */
kbase_free_alloced_region(struct kbase_va_region * reg)1649*4882a593Smuzhiyun void kbase_free_alloced_region(struct kbase_va_region *reg)
1650*4882a593Smuzhiyun {
1651*4882a593Smuzhiyun #if MALI_USE_CSF
1652*4882a593Smuzhiyun if ((reg->flags & KBASE_REG_ZONE_MASK) ==
1653*4882a593Smuzhiyun KBASE_REG_ZONE_MCU_SHARED) {
1654*4882a593Smuzhiyun kfree(reg);
1655*4882a593Smuzhiyun return;
1656*4882a593Smuzhiyun }
1657*4882a593Smuzhiyun #endif
1658*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_FREE)) {
1659*4882a593Smuzhiyun struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
1660*4882a593Smuzhiyun
1661*4882a593Smuzhiyun if (WARN_ON(!kctx))
1662*4882a593Smuzhiyun return;
1663*4882a593Smuzhiyun
1664*4882a593Smuzhiyun if (WARN_ON(kbase_is_region_invalid(reg)))
1665*4882a593Smuzhiyun return;
1666*4882a593Smuzhiyun
1667*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev, "Freeing memory region %pK\n",
1668*4882a593Smuzhiyun (void *)reg);
1669*4882a593Smuzhiyun #if MALI_USE_CSF
1670*4882a593Smuzhiyun if (reg->flags & KBASE_REG_CSF_EVENT)
1671*4882a593Smuzhiyun /*
1672*4882a593Smuzhiyun * This should not be reachable if called from 'mcu_shared' functions
1673*4882a593Smuzhiyun * such as:
1674*4882a593Smuzhiyun * kbase_csf_firmware_mcu_shared_mapping_init
1675*4882a593Smuzhiyun * kbase_csf_firmware_mcu_shared_mapping_term
1676*4882a593Smuzhiyun */
1677*4882a593Smuzhiyun
1678*4882a593Smuzhiyun kbase_unlink_event_mem_page(kctx, reg);
1679*4882a593Smuzhiyun #endif
1680*4882a593Smuzhiyun
1681*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
1682*4882a593Smuzhiyun
1683*4882a593Smuzhiyun /*
1684*4882a593Smuzhiyun * The physical allocation should have been removed from the
1685*4882a593Smuzhiyun * eviction list before this function is called. However, in the
1686*4882a593Smuzhiyun * case of abnormal process termination or the app leaking the
1687*4882a593Smuzhiyun * memory kbase_mem_free_region is not called so it can still be
1688*4882a593Smuzhiyun * on the list at termination time of the region tracker.
1689*4882a593Smuzhiyun */
1690*4882a593Smuzhiyun if (!list_empty(®->gpu_alloc->evict_node)) {
1691*4882a593Smuzhiyun /*
1692*4882a593Smuzhiyun * Unlink the physical allocation before unmaking it
1693*4882a593Smuzhiyun * evictable so that the allocation isn't grown back to
1694*4882a593Smuzhiyun * its last backed size as we're going to unmap it
1695*4882a593Smuzhiyun * anyway.
1696*4882a593Smuzhiyun */
1697*4882a593Smuzhiyun reg->cpu_alloc->reg = NULL;
1698*4882a593Smuzhiyun if (reg->cpu_alloc != reg->gpu_alloc)
1699*4882a593Smuzhiyun reg->gpu_alloc->reg = NULL;
1700*4882a593Smuzhiyun
1701*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
1702*4882a593Smuzhiyun
1703*4882a593Smuzhiyun /*
1704*4882a593Smuzhiyun * If a region has been made evictable then we must
1705*4882a593Smuzhiyun * unmake it before trying to free it.
1706*4882a593Smuzhiyun * If the memory hasn't been reclaimed it will be
1707*4882a593Smuzhiyun * unmapped and freed below, if it has been reclaimed
1708*4882a593Smuzhiyun * then the operations below are no-ops.
1709*4882a593Smuzhiyun */
1710*4882a593Smuzhiyun if (reg->flags & KBASE_REG_DONT_NEED) {
1711*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(reg->cpu_alloc->type ==
1712*4882a593Smuzhiyun KBASE_MEM_TYPE_NATIVE);
1713*4882a593Smuzhiyun kbase_mem_evictable_unmake(reg->gpu_alloc);
1714*4882a593Smuzhiyun }
1715*4882a593Smuzhiyun } else {
1716*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
1717*4882a593Smuzhiyun }
1718*4882a593Smuzhiyun
1719*4882a593Smuzhiyun /*
1720*4882a593Smuzhiyun * Remove the region from the sticky resource metadata
1721*4882a593Smuzhiyun * list should it be there.
1722*4882a593Smuzhiyun */
1723*4882a593Smuzhiyun kbase_sticky_resource_release_force(kctx, NULL,
1724*4882a593Smuzhiyun reg->start_pfn << PAGE_SHIFT);
1725*4882a593Smuzhiyun
1726*4882a593Smuzhiyun kbase_mem_phy_alloc_put(reg->cpu_alloc);
1727*4882a593Smuzhiyun kbase_mem_phy_alloc_put(reg->gpu_alloc);
1728*4882a593Smuzhiyun
1729*4882a593Smuzhiyun reg->flags |= KBASE_REG_VA_FREED;
1730*4882a593Smuzhiyun kbase_va_region_alloc_put(kctx, reg);
1731*4882a593Smuzhiyun } else {
1732*4882a593Smuzhiyun kfree(reg);
1733*4882a593Smuzhiyun }
1734*4882a593Smuzhiyun }
1735*4882a593Smuzhiyun
1736*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_free_alloced_region);
1737*4882a593Smuzhiyun
kbase_gpu_mmap(struct kbase_context * kctx,struct kbase_va_region * reg,u64 addr,size_t nr_pages,size_t align,enum kbase_caller_mmu_sync_info mmu_sync_info)1738*4882a593Smuzhiyun int kbase_gpu_mmap(struct kbase_context *kctx, struct kbase_va_region *reg,
1739*4882a593Smuzhiyun u64 addr, size_t nr_pages, size_t align,
1740*4882a593Smuzhiyun enum kbase_caller_mmu_sync_info mmu_sync_info)
1741*4882a593Smuzhiyun {
1742*4882a593Smuzhiyun int err;
1743*4882a593Smuzhiyun size_t i = 0;
1744*4882a593Smuzhiyun unsigned long attr;
1745*4882a593Smuzhiyun unsigned long mask = ~KBASE_REG_MEMATTR_MASK;
1746*4882a593Smuzhiyun unsigned long gwt_mask = ~0;
1747*4882a593Smuzhiyun int group_id;
1748*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc;
1749*4882a593Smuzhiyun
1750*4882a593Smuzhiyun #ifdef CONFIG_MALI_CINSTR_GWT
1751*4882a593Smuzhiyun if (kctx->gwt_enabled)
1752*4882a593Smuzhiyun gwt_mask = ~KBASE_REG_GPU_WR;
1753*4882a593Smuzhiyun #endif
1754*4882a593Smuzhiyun
1755*4882a593Smuzhiyun if ((kctx->kbdev->system_coherency == COHERENCY_ACE) &&
1756*4882a593Smuzhiyun (reg->flags & KBASE_REG_SHARE_BOTH))
1757*4882a593Smuzhiyun attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_OUTER_WA);
1758*4882a593Smuzhiyun else
1759*4882a593Smuzhiyun attr = KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_WRITE_ALLOC);
1760*4882a593Smuzhiyun
1761*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
1762*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(reg != NULL);
1763*4882a593Smuzhiyun
1764*4882a593Smuzhiyun err = kbase_add_va_region(kctx, reg, addr, nr_pages, align);
1765*4882a593Smuzhiyun if (err)
1766*4882a593Smuzhiyun return err;
1767*4882a593Smuzhiyun
1768*4882a593Smuzhiyun alloc = reg->gpu_alloc;
1769*4882a593Smuzhiyun group_id = alloc->group_id;
1770*4882a593Smuzhiyun
1771*4882a593Smuzhiyun if (reg->gpu_alloc->type == KBASE_MEM_TYPE_ALIAS) {
1772*4882a593Smuzhiyun u64 const stride = alloc->imported.alias.stride;
1773*4882a593Smuzhiyun
1774*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->imported.alias.aliased);
1775*4882a593Smuzhiyun for (i = 0; i < alloc->imported.alias.nents; i++) {
1776*4882a593Smuzhiyun if (alloc->imported.alias.aliased[i].alloc) {
1777*4882a593Smuzhiyun err = kbase_mmu_insert_aliased_pages(
1778*4882a593Smuzhiyun kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
1779*4882a593Smuzhiyun alloc->imported.alias.aliased[i].alloc->pages +
1780*4882a593Smuzhiyun alloc->imported.alias.aliased[i].offset,
1781*4882a593Smuzhiyun alloc->imported.alias.aliased[i].length,
1782*4882a593Smuzhiyun reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info,
1783*4882a593Smuzhiyun NULL);
1784*4882a593Smuzhiyun if (err)
1785*4882a593Smuzhiyun goto bad_aliased_insert;
1786*4882a593Smuzhiyun
1787*4882a593Smuzhiyun /* Note: mapping count is tracked at alias
1788*4882a593Smuzhiyun * creation time
1789*4882a593Smuzhiyun */
1790*4882a593Smuzhiyun } else {
1791*4882a593Smuzhiyun err = kbase_mmu_insert_single_aliased_page(
1792*4882a593Smuzhiyun kctx, reg->start_pfn + i * stride, kctx->aliasing_sink_page,
1793*4882a593Smuzhiyun alloc->imported.alias.aliased[i].length,
1794*4882a593Smuzhiyun (reg->flags & mask & gwt_mask) | attr, group_id,
1795*4882a593Smuzhiyun mmu_sync_info);
1796*4882a593Smuzhiyun
1797*4882a593Smuzhiyun if (err)
1798*4882a593Smuzhiyun goto bad_aliased_insert;
1799*4882a593Smuzhiyun }
1800*4882a593Smuzhiyun }
1801*4882a593Smuzhiyun } else {
1802*4882a593Smuzhiyun if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM ||
1803*4882a593Smuzhiyun reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF) {
1804*4882a593Smuzhiyun
1805*4882a593Smuzhiyun err = kbase_mmu_insert_imported_pages(
1806*4882a593Smuzhiyun kctx->kbdev, &kctx->mmu, reg->start_pfn,
1807*4882a593Smuzhiyun kbase_get_gpu_phy_pages(reg), kbase_reg_current_backed_size(reg),
1808*4882a593Smuzhiyun reg->flags & gwt_mask, kctx->as_nr, group_id, mmu_sync_info, reg);
1809*4882a593Smuzhiyun } else {
1810*4882a593Smuzhiyun err = kbase_mmu_insert_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1811*4882a593Smuzhiyun kbase_get_gpu_phy_pages(reg),
1812*4882a593Smuzhiyun kbase_reg_current_backed_size(reg),
1813*4882a593Smuzhiyun reg->flags & gwt_mask, kctx->as_nr, group_id,
1814*4882a593Smuzhiyun mmu_sync_info, reg, true);
1815*4882a593Smuzhiyun }
1816*4882a593Smuzhiyun
1817*4882a593Smuzhiyun if (err)
1818*4882a593Smuzhiyun goto bad_insert;
1819*4882a593Smuzhiyun kbase_mem_phy_alloc_gpu_mapped(alloc);
1820*4882a593Smuzhiyun }
1821*4882a593Smuzhiyun
1822*4882a593Smuzhiyun if (reg->flags & KBASE_REG_IMPORT_PAD &&
1823*4882a593Smuzhiyun !WARN_ON(reg->nr_pages < reg->gpu_alloc->nents) &&
1824*4882a593Smuzhiyun reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM &&
1825*4882a593Smuzhiyun reg->gpu_alloc->imported.umm.current_mapping_usage_count) {
1826*4882a593Smuzhiyun /* For padded imported dma-buf or user-buf memory, map the dummy
1827*4882a593Smuzhiyun * aliasing page from the end of the imported pages, to the end of
1828*4882a593Smuzhiyun * the region using a read only mapping.
1829*4882a593Smuzhiyun *
1830*4882a593Smuzhiyun * Only map when it's imported dma-buf memory that is currently
1831*4882a593Smuzhiyun * mapped.
1832*4882a593Smuzhiyun *
1833*4882a593Smuzhiyun * Assume reg->gpu_alloc->nents is the number of actual pages
1834*4882a593Smuzhiyun * in the dma-buf memory.
1835*4882a593Smuzhiyun */
1836*4882a593Smuzhiyun err = kbase_mmu_insert_single_imported_page(
1837*4882a593Smuzhiyun kctx, reg->start_pfn + reg->gpu_alloc->nents, kctx->aliasing_sink_page,
1838*4882a593Smuzhiyun reg->nr_pages - reg->gpu_alloc->nents,
1839*4882a593Smuzhiyun (reg->flags | KBASE_REG_GPU_RD) & ~KBASE_REG_GPU_WR, KBASE_MEM_GROUP_SINK,
1840*4882a593Smuzhiyun mmu_sync_info);
1841*4882a593Smuzhiyun if (err)
1842*4882a593Smuzhiyun goto bad_insert;
1843*4882a593Smuzhiyun }
1844*4882a593Smuzhiyun
1845*4882a593Smuzhiyun return err;
1846*4882a593Smuzhiyun
1847*4882a593Smuzhiyun bad_aliased_insert:
1848*4882a593Smuzhiyun while (i-- > 0) {
1849*4882a593Smuzhiyun struct tagged_addr *phys_alloc = NULL;
1850*4882a593Smuzhiyun u64 const stride = alloc->imported.alias.stride;
1851*4882a593Smuzhiyun
1852*4882a593Smuzhiyun if (alloc->imported.alias.aliased[i].alloc != NULL)
1853*4882a593Smuzhiyun phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
1854*4882a593Smuzhiyun alloc->imported.alias.aliased[i].offset;
1855*4882a593Smuzhiyun
1856*4882a593Smuzhiyun kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn + (i * stride),
1857*4882a593Smuzhiyun phys_alloc, alloc->imported.alias.aliased[i].length,
1858*4882a593Smuzhiyun alloc->imported.alias.aliased[i].length, kctx->as_nr,
1859*4882a593Smuzhiyun false);
1860*4882a593Smuzhiyun }
1861*4882a593Smuzhiyun bad_insert:
1862*4882a593Smuzhiyun kbase_remove_va_region(kctx->kbdev, reg);
1863*4882a593Smuzhiyun
1864*4882a593Smuzhiyun return err;
1865*4882a593Smuzhiyun }
1866*4882a593Smuzhiyun
1867*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_gpu_mmap);
1868*4882a593Smuzhiyun
1869*4882a593Smuzhiyun static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
1870*4882a593Smuzhiyun struct kbase_va_region *reg, bool writeable);
1871*4882a593Smuzhiyun
kbase_gpu_munmap(struct kbase_context * kctx,struct kbase_va_region * reg)1872*4882a593Smuzhiyun int kbase_gpu_munmap(struct kbase_context *kctx, struct kbase_va_region *reg)
1873*4882a593Smuzhiyun {
1874*4882a593Smuzhiyun int err = 0;
1875*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc;
1876*4882a593Smuzhiyun
1877*4882a593Smuzhiyun if (reg->start_pfn == 0)
1878*4882a593Smuzhiyun return 0;
1879*4882a593Smuzhiyun
1880*4882a593Smuzhiyun if (!reg->gpu_alloc)
1881*4882a593Smuzhiyun return -EINVAL;
1882*4882a593Smuzhiyun
1883*4882a593Smuzhiyun alloc = reg->gpu_alloc;
1884*4882a593Smuzhiyun
1885*4882a593Smuzhiyun /* Tear down GPU page tables, depending on memory type. */
1886*4882a593Smuzhiyun switch (alloc->type) {
1887*4882a593Smuzhiyun case KBASE_MEM_TYPE_ALIAS: {
1888*4882a593Smuzhiyun size_t i = 0;
1889*4882a593Smuzhiyun /* Due to the way the number of valid PTEs and ATEs are tracked
1890*4882a593Smuzhiyun * currently, only the GPU virtual range that is backed & mapped
1891*4882a593Smuzhiyun * should be passed to the kbase_mmu_teardown_pages() function,
1892*4882a593Smuzhiyun * hence individual aliased regions needs to be unmapped
1893*4882a593Smuzhiyun * separately.
1894*4882a593Smuzhiyun */
1895*4882a593Smuzhiyun for (i = 0; i < alloc->imported.alias.nents; i++) {
1896*4882a593Smuzhiyun struct tagged_addr *phys_alloc = NULL;
1897*4882a593Smuzhiyun int err_loop;
1898*4882a593Smuzhiyun
1899*4882a593Smuzhiyun if (alloc->imported.alias.aliased[i].alloc != NULL)
1900*4882a593Smuzhiyun phys_alloc = alloc->imported.alias.aliased[i].alloc->pages +
1901*4882a593Smuzhiyun alloc->imported.alias.aliased[i].offset;
1902*4882a593Smuzhiyun
1903*4882a593Smuzhiyun err_loop = kbase_mmu_teardown_pages(
1904*4882a593Smuzhiyun kctx->kbdev, &kctx->mmu,
1905*4882a593Smuzhiyun reg->start_pfn + (i * alloc->imported.alias.stride),
1906*4882a593Smuzhiyun phys_alloc, alloc->imported.alias.aliased[i].length,
1907*4882a593Smuzhiyun alloc->imported.alias.aliased[i].length, kctx->as_nr,
1908*4882a593Smuzhiyun false);
1909*4882a593Smuzhiyun
1910*4882a593Smuzhiyun if (WARN_ON_ONCE(err_loop))
1911*4882a593Smuzhiyun err = err_loop;
1912*4882a593Smuzhiyun }
1913*4882a593Smuzhiyun }
1914*4882a593Smuzhiyun break;
1915*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_UMM: {
1916*4882a593Smuzhiyun size_t nr_phys_pages = reg->nr_pages;
1917*4882a593Smuzhiyun size_t nr_virt_pages = reg->nr_pages;
1918*4882a593Smuzhiyun /* If the region has import padding and falls under the threshold for
1919*4882a593Smuzhiyun * issuing a partial GPU cache flush, we want to reduce the number of
1920*4882a593Smuzhiyun * physical pages that get flushed.
1921*4882a593Smuzhiyun
1922*4882a593Smuzhiyun * This is symmetric with case of mapping the memory, which first maps
1923*4882a593Smuzhiyun * each imported physical page to a separate virtual page, and then
1924*4882a593Smuzhiyun * maps the single aliasing sink page to each of the virtual padding
1925*4882a593Smuzhiyun * pages.
1926*4882a593Smuzhiyun */
1927*4882a593Smuzhiyun if (reg->flags & KBASE_REG_IMPORT_PAD)
1928*4882a593Smuzhiyun nr_phys_pages = alloc->nents + 1;
1929*4882a593Smuzhiyun
1930*4882a593Smuzhiyun err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1931*4882a593Smuzhiyun alloc->pages, nr_phys_pages, nr_virt_pages,
1932*4882a593Smuzhiyun kctx->as_nr, true);
1933*4882a593Smuzhiyun }
1934*4882a593Smuzhiyun break;
1935*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
1936*4882a593Smuzhiyun size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
1937*4882a593Smuzhiyun
1938*4882a593Smuzhiyun err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1939*4882a593Smuzhiyun alloc->pages, nr_reg_pages, nr_reg_pages,
1940*4882a593Smuzhiyun kctx->as_nr, true);
1941*4882a593Smuzhiyun }
1942*4882a593Smuzhiyun break;
1943*4882a593Smuzhiyun default: {
1944*4882a593Smuzhiyun size_t nr_reg_pages = kbase_reg_current_backed_size(reg);
1945*4882a593Smuzhiyun
1946*4882a593Smuzhiyun err = kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
1947*4882a593Smuzhiyun alloc->pages, nr_reg_pages, nr_reg_pages,
1948*4882a593Smuzhiyun kctx->as_nr, false);
1949*4882a593Smuzhiyun }
1950*4882a593Smuzhiyun break;
1951*4882a593Smuzhiyun }
1952*4882a593Smuzhiyun
1953*4882a593Smuzhiyun /* Update tracking, and other cleanup, depending on memory type. */
1954*4882a593Smuzhiyun switch (alloc->type) {
1955*4882a593Smuzhiyun case KBASE_MEM_TYPE_ALIAS:
1956*4882a593Smuzhiyun /* We mark the source allocs as unmapped from the GPU when
1957*4882a593Smuzhiyun * putting reg's allocs
1958*4882a593Smuzhiyun */
1959*4882a593Smuzhiyun break;
1960*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
1961*4882a593Smuzhiyun struct kbase_alloc_import_user_buf *user_buf = &alloc->imported.user_buf;
1962*4882a593Smuzhiyun
1963*4882a593Smuzhiyun if (user_buf->current_mapping_usage_count & PINNED_ON_IMPORT) {
1964*4882a593Smuzhiyun user_buf->current_mapping_usage_count &= ~PINNED_ON_IMPORT;
1965*4882a593Smuzhiyun
1966*4882a593Smuzhiyun /* The allocation could still have active mappings. */
1967*4882a593Smuzhiyun if (user_buf->current_mapping_usage_count == 0) {
1968*4882a593Smuzhiyun kbase_jd_user_buf_unmap(kctx, alloc, reg,
1969*4882a593Smuzhiyun (reg->flags &
1970*4882a593Smuzhiyun (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)));
1971*4882a593Smuzhiyun }
1972*4882a593Smuzhiyun }
1973*4882a593Smuzhiyun }
1974*4882a593Smuzhiyun fallthrough;
1975*4882a593Smuzhiyun default:
1976*4882a593Smuzhiyun kbase_mem_phy_alloc_gpu_unmapped(reg->gpu_alloc);
1977*4882a593Smuzhiyun break;
1978*4882a593Smuzhiyun }
1979*4882a593Smuzhiyun
1980*4882a593Smuzhiyun return err;
1981*4882a593Smuzhiyun }
1982*4882a593Smuzhiyun
kbasep_find_enclosing_cpu_mapping(struct kbase_context * kctx,unsigned long uaddr,size_t size,u64 * offset)1983*4882a593Smuzhiyun static struct kbase_cpu_mapping *kbasep_find_enclosing_cpu_mapping(
1984*4882a593Smuzhiyun struct kbase_context *kctx,
1985*4882a593Smuzhiyun unsigned long uaddr, size_t size, u64 *offset)
1986*4882a593Smuzhiyun {
1987*4882a593Smuzhiyun struct vm_area_struct *vma;
1988*4882a593Smuzhiyun struct kbase_cpu_mapping *map;
1989*4882a593Smuzhiyun unsigned long vm_pgoff_in_region;
1990*4882a593Smuzhiyun unsigned long vm_off_in_region;
1991*4882a593Smuzhiyun unsigned long map_start;
1992*4882a593Smuzhiyun size_t map_size;
1993*4882a593Smuzhiyun
1994*4882a593Smuzhiyun lockdep_assert_held(kbase_mem_get_process_mmap_lock());
1995*4882a593Smuzhiyun
1996*4882a593Smuzhiyun if ((uintptr_t) uaddr + size < (uintptr_t) uaddr) /* overflow check */
1997*4882a593Smuzhiyun return NULL;
1998*4882a593Smuzhiyun
1999*4882a593Smuzhiyun vma = find_vma_intersection(current->mm, uaddr, uaddr+size);
2000*4882a593Smuzhiyun
2001*4882a593Smuzhiyun if (!vma || vma->vm_start > uaddr)
2002*4882a593Smuzhiyun return NULL;
2003*4882a593Smuzhiyun if (vma->vm_ops != &kbase_vm_ops)
2004*4882a593Smuzhiyun /* Not ours! */
2005*4882a593Smuzhiyun return NULL;
2006*4882a593Smuzhiyun
2007*4882a593Smuzhiyun map = vma->vm_private_data;
2008*4882a593Smuzhiyun
2009*4882a593Smuzhiyun if (map->kctx != kctx)
2010*4882a593Smuzhiyun /* Not from this context! */
2011*4882a593Smuzhiyun return NULL;
2012*4882a593Smuzhiyun
2013*4882a593Smuzhiyun vm_pgoff_in_region = vma->vm_pgoff - map->region->start_pfn;
2014*4882a593Smuzhiyun vm_off_in_region = vm_pgoff_in_region << PAGE_SHIFT;
2015*4882a593Smuzhiyun map_start = vma->vm_start - vm_off_in_region;
2016*4882a593Smuzhiyun map_size = map->region->nr_pages << PAGE_SHIFT;
2017*4882a593Smuzhiyun
2018*4882a593Smuzhiyun if ((uaddr + size) > (map_start + map_size))
2019*4882a593Smuzhiyun /* Not within the CPU mapping */
2020*4882a593Smuzhiyun return NULL;
2021*4882a593Smuzhiyun
2022*4882a593Smuzhiyun *offset = (uaddr - vma->vm_start) + vm_off_in_region;
2023*4882a593Smuzhiyun
2024*4882a593Smuzhiyun return map;
2025*4882a593Smuzhiyun }
2026*4882a593Smuzhiyun
kbasep_find_enclosing_cpu_mapping_offset(struct kbase_context * kctx,unsigned long uaddr,size_t size,u64 * offset)2027*4882a593Smuzhiyun int kbasep_find_enclosing_cpu_mapping_offset(
2028*4882a593Smuzhiyun struct kbase_context *kctx,
2029*4882a593Smuzhiyun unsigned long uaddr, size_t size, u64 *offset)
2030*4882a593Smuzhiyun {
2031*4882a593Smuzhiyun struct kbase_cpu_mapping *map;
2032*4882a593Smuzhiyun
2033*4882a593Smuzhiyun kbase_os_mem_map_lock(kctx);
2034*4882a593Smuzhiyun
2035*4882a593Smuzhiyun map = kbasep_find_enclosing_cpu_mapping(kctx, uaddr, size, offset);
2036*4882a593Smuzhiyun
2037*4882a593Smuzhiyun kbase_os_mem_map_unlock(kctx);
2038*4882a593Smuzhiyun
2039*4882a593Smuzhiyun if (!map)
2040*4882a593Smuzhiyun return -EINVAL;
2041*4882a593Smuzhiyun
2042*4882a593Smuzhiyun return 0;
2043*4882a593Smuzhiyun }
2044*4882a593Smuzhiyun
2045*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbasep_find_enclosing_cpu_mapping_offset);
2046*4882a593Smuzhiyun
kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context * kctx,u64 gpu_addr,size_t size,u64 * start,u64 * offset)2047*4882a593Smuzhiyun int kbasep_find_enclosing_gpu_mapping_start_and_offset(struct kbase_context *kctx,
2048*4882a593Smuzhiyun u64 gpu_addr, size_t size, u64 *start, u64 *offset)
2049*4882a593Smuzhiyun {
2050*4882a593Smuzhiyun struct kbase_va_region *region;
2051*4882a593Smuzhiyun
2052*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
2053*4882a593Smuzhiyun
2054*4882a593Smuzhiyun region = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
2055*4882a593Smuzhiyun
2056*4882a593Smuzhiyun if (!region) {
2057*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
2058*4882a593Smuzhiyun return -EINVAL;
2059*4882a593Smuzhiyun }
2060*4882a593Smuzhiyun
2061*4882a593Smuzhiyun *start = region->start_pfn << PAGE_SHIFT;
2062*4882a593Smuzhiyun
2063*4882a593Smuzhiyun *offset = gpu_addr - *start;
2064*4882a593Smuzhiyun
2065*4882a593Smuzhiyun if (((region->start_pfn + region->nr_pages) << PAGE_SHIFT) < (gpu_addr + size)) {
2066*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
2067*4882a593Smuzhiyun return -EINVAL;
2068*4882a593Smuzhiyun }
2069*4882a593Smuzhiyun
2070*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
2071*4882a593Smuzhiyun
2072*4882a593Smuzhiyun return 0;
2073*4882a593Smuzhiyun }
2074*4882a593Smuzhiyun
2075*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbasep_find_enclosing_gpu_mapping_start_and_offset);
2076*4882a593Smuzhiyun
kbase_sync_single(struct kbase_context * kctx,struct tagged_addr t_cpu_pa,struct tagged_addr t_gpu_pa,off_t offset,size_t size,enum kbase_sync_type sync_fn)2077*4882a593Smuzhiyun void kbase_sync_single(struct kbase_context *kctx,
2078*4882a593Smuzhiyun struct tagged_addr t_cpu_pa, struct tagged_addr t_gpu_pa,
2079*4882a593Smuzhiyun off_t offset, size_t size, enum kbase_sync_type sync_fn)
2080*4882a593Smuzhiyun {
2081*4882a593Smuzhiyun struct page *cpu_page;
2082*4882a593Smuzhiyun phys_addr_t cpu_pa = as_phys_addr_t(t_cpu_pa);
2083*4882a593Smuzhiyun phys_addr_t gpu_pa = as_phys_addr_t(t_gpu_pa);
2084*4882a593Smuzhiyun
2085*4882a593Smuzhiyun cpu_page = pfn_to_page(PFN_DOWN(cpu_pa));
2086*4882a593Smuzhiyun
2087*4882a593Smuzhiyun if (likely(cpu_pa == gpu_pa)) {
2088*4882a593Smuzhiyun dma_addr_t dma_addr;
2089*4882a593Smuzhiyun
2090*4882a593Smuzhiyun BUG_ON(!cpu_page);
2091*4882a593Smuzhiyun BUG_ON(offset + size > PAGE_SIZE);
2092*4882a593Smuzhiyun
2093*4882a593Smuzhiyun dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + offset;
2094*4882a593Smuzhiyun
2095*4882a593Smuzhiyun if (sync_fn == KBASE_SYNC_TO_CPU)
2096*4882a593Smuzhiyun dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr,
2097*4882a593Smuzhiyun size, DMA_BIDIRECTIONAL);
2098*4882a593Smuzhiyun else if (sync_fn == KBASE_SYNC_TO_DEVICE)
2099*4882a593Smuzhiyun dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
2100*4882a593Smuzhiyun size, DMA_BIDIRECTIONAL);
2101*4882a593Smuzhiyun } else {
2102*4882a593Smuzhiyun void *src = NULL;
2103*4882a593Smuzhiyun void *dst = NULL;
2104*4882a593Smuzhiyun struct page *gpu_page;
2105*4882a593Smuzhiyun dma_addr_t dma_addr;
2106*4882a593Smuzhiyun
2107*4882a593Smuzhiyun if (WARN(!gpu_pa, "No GPU PA found for infinite cache op"))
2108*4882a593Smuzhiyun return;
2109*4882a593Smuzhiyun
2110*4882a593Smuzhiyun gpu_page = pfn_to_page(PFN_DOWN(gpu_pa));
2111*4882a593Smuzhiyun dma_addr = kbase_dma_addr_from_tagged(t_gpu_pa) + offset;
2112*4882a593Smuzhiyun
2113*4882a593Smuzhiyun if (sync_fn == KBASE_SYNC_TO_DEVICE) {
2114*4882a593Smuzhiyun src = ((unsigned char *)kmap(cpu_page)) + offset;
2115*4882a593Smuzhiyun dst = ((unsigned char *)kmap(gpu_page)) + offset;
2116*4882a593Smuzhiyun } else if (sync_fn == KBASE_SYNC_TO_CPU) {
2117*4882a593Smuzhiyun dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, size,
2118*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
2119*4882a593Smuzhiyun src = ((unsigned char *)kmap(gpu_page)) + offset;
2120*4882a593Smuzhiyun dst = ((unsigned char *)kmap(cpu_page)) + offset;
2121*4882a593Smuzhiyun }
2122*4882a593Smuzhiyun
2123*4882a593Smuzhiyun memcpy(dst, src, size);
2124*4882a593Smuzhiyun kunmap(gpu_page);
2125*4882a593Smuzhiyun kunmap(cpu_page);
2126*4882a593Smuzhiyun if (sync_fn == KBASE_SYNC_TO_DEVICE)
2127*4882a593Smuzhiyun dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, size,
2128*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
2129*4882a593Smuzhiyun }
2130*4882a593Smuzhiyun }
2131*4882a593Smuzhiyun
kbase_do_syncset(struct kbase_context * kctx,struct basep_syncset * sset,enum kbase_sync_type sync_fn)2132*4882a593Smuzhiyun static int kbase_do_syncset(struct kbase_context *kctx,
2133*4882a593Smuzhiyun struct basep_syncset *sset, enum kbase_sync_type sync_fn)
2134*4882a593Smuzhiyun {
2135*4882a593Smuzhiyun int err = 0;
2136*4882a593Smuzhiyun struct kbase_va_region *reg;
2137*4882a593Smuzhiyun struct kbase_cpu_mapping *map;
2138*4882a593Smuzhiyun unsigned long start;
2139*4882a593Smuzhiyun size_t size;
2140*4882a593Smuzhiyun struct tagged_addr *cpu_pa;
2141*4882a593Smuzhiyun struct tagged_addr *gpu_pa;
2142*4882a593Smuzhiyun u64 page_off, page_count;
2143*4882a593Smuzhiyun u64 i;
2144*4882a593Smuzhiyun u64 offset;
2145*4882a593Smuzhiyun
2146*4882a593Smuzhiyun kbase_os_mem_map_lock(kctx);
2147*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
2148*4882a593Smuzhiyun
2149*4882a593Smuzhiyun /* find the region where the virtual address is contained */
2150*4882a593Smuzhiyun reg = kbase_region_tracker_find_region_enclosing_address(kctx,
2151*4882a593Smuzhiyun sset->mem_handle.basep.handle);
2152*4882a593Smuzhiyun if (kbase_is_region_invalid_or_free(reg)) {
2153*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "Can't find a valid region at VA 0x%016llX",
2154*4882a593Smuzhiyun sset->mem_handle.basep.handle);
2155*4882a593Smuzhiyun err = -EINVAL;
2156*4882a593Smuzhiyun goto out_unlock;
2157*4882a593Smuzhiyun }
2158*4882a593Smuzhiyun
2159*4882a593Smuzhiyun /*
2160*4882a593Smuzhiyun * Handle imported memory before checking for KBASE_REG_CPU_CACHED. The
2161*4882a593Smuzhiyun * CPU mapping cacheability is defined by the owner of the imported
2162*4882a593Smuzhiyun * memory, and not by kbase, therefore we must assume that any imported
2163*4882a593Smuzhiyun * memory may be cached.
2164*4882a593Smuzhiyun */
2165*4882a593Smuzhiyun if (kbase_mem_is_imported(reg->gpu_alloc->type)) {
2166*4882a593Smuzhiyun err = kbase_mem_do_sync_imported(kctx, reg, sync_fn);
2167*4882a593Smuzhiyun goto out_unlock;
2168*4882a593Smuzhiyun }
2169*4882a593Smuzhiyun
2170*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_CPU_CACHED))
2171*4882a593Smuzhiyun goto out_unlock;
2172*4882a593Smuzhiyun
2173*4882a593Smuzhiyun start = (uintptr_t)sset->user_addr;
2174*4882a593Smuzhiyun size = (size_t)sset->size;
2175*4882a593Smuzhiyun
2176*4882a593Smuzhiyun map = kbasep_find_enclosing_cpu_mapping(kctx, start, size, &offset);
2177*4882a593Smuzhiyun if (!map) {
2178*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "Can't find CPU mapping 0x%016lX for VA 0x%016llX",
2179*4882a593Smuzhiyun start, sset->mem_handle.basep.handle);
2180*4882a593Smuzhiyun err = -EINVAL;
2181*4882a593Smuzhiyun goto out_unlock;
2182*4882a593Smuzhiyun }
2183*4882a593Smuzhiyun
2184*4882a593Smuzhiyun page_off = offset >> PAGE_SHIFT;
2185*4882a593Smuzhiyun offset &= ~PAGE_MASK;
2186*4882a593Smuzhiyun page_count = (size + offset + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
2187*4882a593Smuzhiyun cpu_pa = kbase_get_cpu_phy_pages(reg);
2188*4882a593Smuzhiyun gpu_pa = kbase_get_gpu_phy_pages(reg);
2189*4882a593Smuzhiyun
2190*4882a593Smuzhiyun if (page_off > reg->nr_pages ||
2191*4882a593Smuzhiyun page_off + page_count > reg->nr_pages) {
2192*4882a593Smuzhiyun /* Sync overflows the region */
2193*4882a593Smuzhiyun err = -EINVAL;
2194*4882a593Smuzhiyun goto out_unlock;
2195*4882a593Smuzhiyun }
2196*4882a593Smuzhiyun
2197*4882a593Smuzhiyun /* Sync first page */
2198*4882a593Smuzhiyun if (as_phys_addr_t(cpu_pa[page_off])) {
2199*4882a593Smuzhiyun size_t sz = MIN(((size_t) PAGE_SIZE - offset), size);
2200*4882a593Smuzhiyun
2201*4882a593Smuzhiyun kbase_sync_single(kctx, cpu_pa[page_off], gpu_pa[page_off],
2202*4882a593Smuzhiyun offset, sz, sync_fn);
2203*4882a593Smuzhiyun }
2204*4882a593Smuzhiyun
2205*4882a593Smuzhiyun /* Sync middle pages (if any) */
2206*4882a593Smuzhiyun for (i = 1; page_count > 2 && i < page_count - 1; i++) {
2207*4882a593Smuzhiyun /* we grow upwards, so bail on first non-present page */
2208*4882a593Smuzhiyun if (!as_phys_addr_t(cpu_pa[page_off + i]))
2209*4882a593Smuzhiyun break;
2210*4882a593Smuzhiyun
2211*4882a593Smuzhiyun kbase_sync_single(kctx, cpu_pa[page_off + i],
2212*4882a593Smuzhiyun gpu_pa[page_off + i], 0, PAGE_SIZE, sync_fn);
2213*4882a593Smuzhiyun }
2214*4882a593Smuzhiyun
2215*4882a593Smuzhiyun /* Sync last page (if any) */
2216*4882a593Smuzhiyun if (page_count > 1 &&
2217*4882a593Smuzhiyun as_phys_addr_t(cpu_pa[page_off + page_count - 1])) {
2218*4882a593Smuzhiyun size_t sz = ((start + size - 1) & ~PAGE_MASK) + 1;
2219*4882a593Smuzhiyun
2220*4882a593Smuzhiyun kbase_sync_single(kctx, cpu_pa[page_off + page_count - 1],
2221*4882a593Smuzhiyun gpu_pa[page_off + page_count - 1], 0, sz,
2222*4882a593Smuzhiyun sync_fn);
2223*4882a593Smuzhiyun }
2224*4882a593Smuzhiyun
2225*4882a593Smuzhiyun out_unlock:
2226*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
2227*4882a593Smuzhiyun kbase_os_mem_map_unlock(kctx);
2228*4882a593Smuzhiyun return err;
2229*4882a593Smuzhiyun }
2230*4882a593Smuzhiyun
kbase_sync_now(struct kbase_context * kctx,struct basep_syncset * sset)2231*4882a593Smuzhiyun int kbase_sync_now(struct kbase_context *kctx, struct basep_syncset *sset)
2232*4882a593Smuzhiyun {
2233*4882a593Smuzhiyun int err = -EINVAL;
2234*4882a593Smuzhiyun
2235*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
2236*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(sset != NULL);
2237*4882a593Smuzhiyun
2238*4882a593Smuzhiyun if (sset->mem_handle.basep.handle & ~PAGE_MASK) {
2239*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev,
2240*4882a593Smuzhiyun "mem_handle: passed parameter is invalid");
2241*4882a593Smuzhiyun return -EINVAL;
2242*4882a593Smuzhiyun }
2243*4882a593Smuzhiyun
2244*4882a593Smuzhiyun switch (sset->type) {
2245*4882a593Smuzhiyun case BASE_SYNCSET_OP_MSYNC:
2246*4882a593Smuzhiyun err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_DEVICE);
2247*4882a593Smuzhiyun break;
2248*4882a593Smuzhiyun
2249*4882a593Smuzhiyun case BASE_SYNCSET_OP_CSYNC:
2250*4882a593Smuzhiyun err = kbase_do_syncset(kctx, sset, KBASE_SYNC_TO_CPU);
2251*4882a593Smuzhiyun break;
2252*4882a593Smuzhiyun
2253*4882a593Smuzhiyun default:
2254*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "Unknown msync op %d\n", sset->type);
2255*4882a593Smuzhiyun break;
2256*4882a593Smuzhiyun }
2257*4882a593Smuzhiyun
2258*4882a593Smuzhiyun return err;
2259*4882a593Smuzhiyun }
2260*4882a593Smuzhiyun
2261*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_sync_now);
2262*4882a593Smuzhiyun
2263*4882a593Smuzhiyun /* vm lock must be held */
kbase_mem_free_region(struct kbase_context * kctx,struct kbase_va_region * reg)2264*4882a593Smuzhiyun int kbase_mem_free_region(struct kbase_context *kctx, struct kbase_va_region *reg)
2265*4882a593Smuzhiyun {
2266*4882a593Smuzhiyun int err;
2267*4882a593Smuzhiyun
2268*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
2269*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(reg != NULL);
2270*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev, "%s %pK in kctx %pK\n",
2271*4882a593Smuzhiyun __func__, (void *)reg, (void *)kctx);
2272*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
2273*4882a593Smuzhiyun
2274*4882a593Smuzhiyun if (kbase_va_region_is_no_user_free(reg)) {
2275*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "Attempt to free GPU memory whose freeing by user space is forbidden!\n");
2276*4882a593Smuzhiyun return -EINVAL;
2277*4882a593Smuzhiyun }
2278*4882a593Smuzhiyun
2279*4882a593Smuzhiyun /* If a region has been made evictable then we must unmake it
2280*4882a593Smuzhiyun * before trying to free it.
2281*4882a593Smuzhiyun * If the memory hasn't been reclaimed it will be unmapped and freed
2282*4882a593Smuzhiyun * below, if it has been reclaimed then the operations below are no-ops.
2283*4882a593Smuzhiyun */
2284*4882a593Smuzhiyun if (reg->flags & KBASE_REG_DONT_NEED) {
2285*4882a593Smuzhiyun WARN_ON(reg->cpu_alloc->type != KBASE_MEM_TYPE_NATIVE);
2286*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
2287*4882a593Smuzhiyun /* Unlink the physical allocation before unmaking it evictable so
2288*4882a593Smuzhiyun * that the allocation isn't grown back to its last backed size
2289*4882a593Smuzhiyun * as we're going to unmap it anyway.
2290*4882a593Smuzhiyun */
2291*4882a593Smuzhiyun reg->cpu_alloc->reg = NULL;
2292*4882a593Smuzhiyun if (reg->cpu_alloc != reg->gpu_alloc)
2293*4882a593Smuzhiyun reg->gpu_alloc->reg = NULL;
2294*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
2295*4882a593Smuzhiyun kbase_mem_evictable_unmake(reg->gpu_alloc);
2296*4882a593Smuzhiyun }
2297*4882a593Smuzhiyun
2298*4882a593Smuzhiyun err = kbase_gpu_munmap(kctx, reg);
2299*4882a593Smuzhiyun if (err) {
2300*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "Could not unmap from the GPU...\n");
2301*4882a593Smuzhiyun goto out;
2302*4882a593Smuzhiyun }
2303*4882a593Smuzhiyun
2304*4882a593Smuzhiyun #if MALI_USE_CSF
2305*4882a593Smuzhiyun if (((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_FIXED_VA) ||
2306*4882a593Smuzhiyun ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_EXEC_FIXED_VA)) {
2307*4882a593Smuzhiyun if (reg->flags & KBASE_REG_FIXED_ADDRESS)
2308*4882a593Smuzhiyun atomic64_dec(&kctx->num_fixed_allocs);
2309*4882a593Smuzhiyun else
2310*4882a593Smuzhiyun atomic64_dec(&kctx->num_fixable_allocs);
2311*4882a593Smuzhiyun }
2312*4882a593Smuzhiyun #endif
2313*4882a593Smuzhiyun
2314*4882a593Smuzhiyun /* This will also free the physical pages */
2315*4882a593Smuzhiyun kbase_free_alloced_region(reg);
2316*4882a593Smuzhiyun
2317*4882a593Smuzhiyun out:
2318*4882a593Smuzhiyun return err;
2319*4882a593Smuzhiyun }
2320*4882a593Smuzhiyun
2321*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_mem_free_region);
2322*4882a593Smuzhiyun
2323*4882a593Smuzhiyun /**
2324*4882a593Smuzhiyun * kbase_mem_free - Free the region from the GPU and unregister it.
2325*4882a593Smuzhiyun *
2326*4882a593Smuzhiyun * @kctx: KBase context
2327*4882a593Smuzhiyun * @gpu_addr: GPU address to free
2328*4882a593Smuzhiyun *
2329*4882a593Smuzhiyun * This function implements the free operation on a memory segment.
2330*4882a593Smuzhiyun * It will loudly fail if called with outstanding mappings.
2331*4882a593Smuzhiyun *
2332*4882a593Smuzhiyun * Return: 0 on success.
2333*4882a593Smuzhiyun */
kbase_mem_free(struct kbase_context * kctx,u64 gpu_addr)2334*4882a593Smuzhiyun int kbase_mem_free(struct kbase_context *kctx, u64 gpu_addr)
2335*4882a593Smuzhiyun {
2336*4882a593Smuzhiyun int err = 0;
2337*4882a593Smuzhiyun struct kbase_va_region *reg;
2338*4882a593Smuzhiyun
2339*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
2340*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev, "%s 0x%llx in kctx %pK\n",
2341*4882a593Smuzhiyun __func__, gpu_addr, (void *)kctx);
2342*4882a593Smuzhiyun
2343*4882a593Smuzhiyun if ((gpu_addr & ~PAGE_MASK) && (gpu_addr >= PAGE_SIZE)) {
2344*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "%s: gpu_addr parameter is invalid", __func__);
2345*4882a593Smuzhiyun return -EINVAL;
2346*4882a593Smuzhiyun }
2347*4882a593Smuzhiyun
2348*4882a593Smuzhiyun if (gpu_addr == 0) {
2349*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev,
2350*4882a593Smuzhiyun "gpu_addr 0 is reserved for the ringbuffer and it's an error to try to free it using %s\n",
2351*4882a593Smuzhiyun __func__);
2352*4882a593Smuzhiyun return -EINVAL;
2353*4882a593Smuzhiyun }
2354*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
2355*4882a593Smuzhiyun
2356*4882a593Smuzhiyun if (gpu_addr >= BASE_MEM_COOKIE_BASE &&
2357*4882a593Smuzhiyun gpu_addr < BASE_MEM_FIRST_FREE_ADDRESS) {
2358*4882a593Smuzhiyun int cookie = PFN_DOWN(gpu_addr - BASE_MEM_COOKIE_BASE);
2359*4882a593Smuzhiyun
2360*4882a593Smuzhiyun reg = kctx->pending_regions[cookie];
2361*4882a593Smuzhiyun if (!reg) {
2362*4882a593Smuzhiyun err = -EINVAL;
2363*4882a593Smuzhiyun goto out_unlock;
2364*4882a593Smuzhiyun }
2365*4882a593Smuzhiyun
2366*4882a593Smuzhiyun /* ask to unlink the cookie as we'll free it */
2367*4882a593Smuzhiyun
2368*4882a593Smuzhiyun kctx->pending_regions[cookie] = NULL;
2369*4882a593Smuzhiyun bitmap_set(kctx->cookies, cookie, 1);
2370*4882a593Smuzhiyun
2371*4882a593Smuzhiyun kbase_free_alloced_region(reg);
2372*4882a593Smuzhiyun } else {
2373*4882a593Smuzhiyun /* A real GPU va */
2374*4882a593Smuzhiyun /* Validate the region */
2375*4882a593Smuzhiyun reg = kbase_region_tracker_find_region_base_address(kctx, gpu_addr);
2376*4882a593Smuzhiyun if (kbase_is_region_invalid_or_free(reg)) {
2377*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "%s called with nonexistent gpu_addr 0x%llX",
2378*4882a593Smuzhiyun __func__, gpu_addr);
2379*4882a593Smuzhiyun err = -EINVAL;
2380*4882a593Smuzhiyun goto out_unlock;
2381*4882a593Smuzhiyun }
2382*4882a593Smuzhiyun
2383*4882a593Smuzhiyun if ((reg->flags & KBASE_REG_ZONE_MASK) == KBASE_REG_ZONE_SAME_VA) {
2384*4882a593Smuzhiyun /* SAME_VA must be freed through munmap */
2385*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev, "%s called on SAME_VA memory 0x%llX", __func__,
2386*4882a593Smuzhiyun gpu_addr);
2387*4882a593Smuzhiyun err = -EINVAL;
2388*4882a593Smuzhiyun goto out_unlock;
2389*4882a593Smuzhiyun }
2390*4882a593Smuzhiyun err = kbase_mem_free_region(kctx, reg);
2391*4882a593Smuzhiyun }
2392*4882a593Smuzhiyun
2393*4882a593Smuzhiyun out_unlock:
2394*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
2395*4882a593Smuzhiyun return err;
2396*4882a593Smuzhiyun }
2397*4882a593Smuzhiyun
2398*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_mem_free);
2399*4882a593Smuzhiyun
kbase_update_region_flags(struct kbase_context * kctx,struct kbase_va_region * reg,unsigned long flags)2400*4882a593Smuzhiyun int kbase_update_region_flags(struct kbase_context *kctx,
2401*4882a593Smuzhiyun struct kbase_va_region *reg, unsigned long flags)
2402*4882a593Smuzhiyun {
2403*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(reg != NULL);
2404*4882a593Smuzhiyun KBASE_DEBUG_ASSERT((flags & ~((1ul << BASE_MEM_FLAGS_NR_BITS) - 1)) == 0);
2405*4882a593Smuzhiyun
2406*4882a593Smuzhiyun reg->flags |= kbase_cache_enabled(flags, reg->nr_pages);
2407*4882a593Smuzhiyun /* all memory is now growable */
2408*4882a593Smuzhiyun reg->flags |= KBASE_REG_GROWABLE;
2409*4882a593Smuzhiyun
2410*4882a593Smuzhiyun if (flags & BASE_MEM_GROW_ON_GPF)
2411*4882a593Smuzhiyun reg->flags |= KBASE_REG_PF_GROW;
2412*4882a593Smuzhiyun
2413*4882a593Smuzhiyun if (flags & BASE_MEM_PROT_CPU_WR)
2414*4882a593Smuzhiyun reg->flags |= KBASE_REG_CPU_WR;
2415*4882a593Smuzhiyun
2416*4882a593Smuzhiyun if (flags & BASE_MEM_PROT_CPU_RD)
2417*4882a593Smuzhiyun reg->flags |= KBASE_REG_CPU_RD;
2418*4882a593Smuzhiyun
2419*4882a593Smuzhiyun if (flags & BASE_MEM_PROT_GPU_WR)
2420*4882a593Smuzhiyun reg->flags |= KBASE_REG_GPU_WR;
2421*4882a593Smuzhiyun
2422*4882a593Smuzhiyun if (flags & BASE_MEM_PROT_GPU_RD)
2423*4882a593Smuzhiyun reg->flags |= KBASE_REG_GPU_RD;
2424*4882a593Smuzhiyun
2425*4882a593Smuzhiyun if (0 == (flags & BASE_MEM_PROT_GPU_EX))
2426*4882a593Smuzhiyun reg->flags |= KBASE_REG_GPU_NX;
2427*4882a593Smuzhiyun
2428*4882a593Smuzhiyun if (!kbase_device_is_cpu_coherent(kctx->kbdev)) {
2429*4882a593Smuzhiyun if (flags & BASE_MEM_COHERENT_SYSTEM_REQUIRED &&
2430*4882a593Smuzhiyun !(flags & BASE_MEM_UNCACHED_GPU))
2431*4882a593Smuzhiyun return -EINVAL;
2432*4882a593Smuzhiyun } else if (flags & (BASE_MEM_COHERENT_SYSTEM |
2433*4882a593Smuzhiyun BASE_MEM_COHERENT_SYSTEM_REQUIRED)) {
2434*4882a593Smuzhiyun reg->flags |= KBASE_REG_SHARE_BOTH;
2435*4882a593Smuzhiyun }
2436*4882a593Smuzhiyun
2437*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_SHARE_BOTH) &&
2438*4882a593Smuzhiyun flags & BASE_MEM_COHERENT_LOCAL) {
2439*4882a593Smuzhiyun reg->flags |= KBASE_REG_SHARE_IN;
2440*4882a593Smuzhiyun }
2441*4882a593Smuzhiyun
2442*4882a593Smuzhiyun #if !MALI_USE_CSF
2443*4882a593Smuzhiyun if (flags & BASE_MEM_TILER_ALIGN_TOP)
2444*4882a593Smuzhiyun reg->flags |= KBASE_REG_TILER_ALIGN_TOP;
2445*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
2446*4882a593Smuzhiyun
2447*4882a593Smuzhiyun #if MALI_USE_CSF
2448*4882a593Smuzhiyun if (flags & BASE_MEM_CSF_EVENT) {
2449*4882a593Smuzhiyun reg->flags |= KBASE_REG_CSF_EVENT;
2450*4882a593Smuzhiyun reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
2451*4882a593Smuzhiyun
2452*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_SHARE_BOTH)) {
2453*4882a593Smuzhiyun /* On non coherent platforms need to map as uncached on
2454*4882a593Smuzhiyun * both sides.
2455*4882a593Smuzhiyun */
2456*4882a593Smuzhiyun reg->flags &= ~KBASE_REG_CPU_CACHED;
2457*4882a593Smuzhiyun reg->flags &= ~KBASE_REG_GPU_CACHED;
2458*4882a593Smuzhiyun }
2459*4882a593Smuzhiyun }
2460*4882a593Smuzhiyun #endif
2461*4882a593Smuzhiyun
2462*4882a593Smuzhiyun /* Set up default MEMATTR usage */
2463*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_GPU_CACHED)) {
2464*4882a593Smuzhiyun if (kctx->kbdev->mmu_mode->flags &
2465*4882a593Smuzhiyun KBASE_MMU_MODE_HAS_NON_CACHEABLE) {
2466*4882a593Smuzhiyun /* Override shareability, and MEMATTR for uncached */
2467*4882a593Smuzhiyun reg->flags &= ~(KBASE_REG_SHARE_IN | KBASE_REG_SHARE_BOTH);
2468*4882a593Smuzhiyun reg->flags |= KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_NON_CACHEABLE);
2469*4882a593Smuzhiyun } else {
2470*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev,
2471*4882a593Smuzhiyun "Can't allocate GPU uncached memory due to MMU in Legacy Mode\n");
2472*4882a593Smuzhiyun return -EINVAL;
2473*4882a593Smuzhiyun }
2474*4882a593Smuzhiyun #if MALI_USE_CSF
2475*4882a593Smuzhiyun } else if (reg->flags & KBASE_REG_CSF_EVENT) {
2476*4882a593Smuzhiyun WARN_ON(!(reg->flags & KBASE_REG_SHARE_BOTH));
2477*4882a593Smuzhiyun
2478*4882a593Smuzhiyun reg->flags |=
2479*4882a593Smuzhiyun KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_SHARED);
2480*4882a593Smuzhiyun #endif
2481*4882a593Smuzhiyun } else if (kctx->kbdev->system_coherency == COHERENCY_ACE &&
2482*4882a593Smuzhiyun (reg->flags & KBASE_REG_SHARE_BOTH)) {
2483*4882a593Smuzhiyun reg->flags |=
2484*4882a593Smuzhiyun KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT_ACE);
2485*4882a593Smuzhiyun } else {
2486*4882a593Smuzhiyun reg->flags |=
2487*4882a593Smuzhiyun KBASE_REG_MEMATTR_INDEX(AS_MEMATTR_INDEX_DEFAULT);
2488*4882a593Smuzhiyun }
2489*4882a593Smuzhiyun
2490*4882a593Smuzhiyun if (flags & BASEP_MEM_PERMANENT_KERNEL_MAPPING)
2491*4882a593Smuzhiyun reg->flags |= KBASE_REG_PERMANENT_KERNEL_MAPPING;
2492*4882a593Smuzhiyun
2493*4882a593Smuzhiyun if (flags & BASEP_MEM_NO_USER_FREE) {
2494*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
2495*4882a593Smuzhiyun kbase_va_region_no_user_free_inc(reg);
2496*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
2497*4882a593Smuzhiyun }
2498*4882a593Smuzhiyun
2499*4882a593Smuzhiyun if (flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE)
2500*4882a593Smuzhiyun reg->flags |= KBASE_REG_GPU_VA_SAME_4GB_PAGE;
2501*4882a593Smuzhiyun
2502*4882a593Smuzhiyun #if MALI_USE_CSF
2503*4882a593Smuzhiyun if (flags & BASE_MEM_FIXED)
2504*4882a593Smuzhiyun reg->flags |= KBASE_REG_FIXED_ADDRESS;
2505*4882a593Smuzhiyun #endif
2506*4882a593Smuzhiyun
2507*4882a593Smuzhiyun return 0;
2508*4882a593Smuzhiyun }
2509*4882a593Smuzhiyun
kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc * alloc,size_t nr_pages_requested)2510*4882a593Smuzhiyun int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
2511*4882a593Smuzhiyun size_t nr_pages_requested)
2512*4882a593Smuzhiyun {
2513*4882a593Smuzhiyun int new_page_count __maybe_unused;
2514*4882a593Smuzhiyun size_t nr_left = nr_pages_requested;
2515*4882a593Smuzhiyun int res;
2516*4882a593Smuzhiyun struct kbase_context *kctx;
2517*4882a593Smuzhiyun struct kbase_device *kbdev;
2518*4882a593Smuzhiyun struct tagged_addr *tp;
2519*4882a593Smuzhiyun
2520*4882a593Smuzhiyun if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
2521*4882a593Smuzhiyun WARN_ON(alloc->imported.native.kctx == NULL) ||
2522*4882a593Smuzhiyun WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
2523*4882a593Smuzhiyun return -EINVAL;
2524*4882a593Smuzhiyun }
2525*4882a593Smuzhiyun
2526*4882a593Smuzhiyun if (alloc->reg) {
2527*4882a593Smuzhiyun if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
2528*4882a593Smuzhiyun goto invalid_request;
2529*4882a593Smuzhiyun }
2530*4882a593Smuzhiyun
2531*4882a593Smuzhiyun kctx = alloc->imported.native.kctx;
2532*4882a593Smuzhiyun kbdev = kctx->kbdev;
2533*4882a593Smuzhiyun
2534*4882a593Smuzhiyun if (nr_pages_requested == 0)
2535*4882a593Smuzhiyun goto done; /*nothing to do*/
2536*4882a593Smuzhiyun
2537*4882a593Smuzhiyun new_page_count = atomic_add_return(
2538*4882a593Smuzhiyun nr_pages_requested, &kctx->used_pages);
2539*4882a593Smuzhiyun atomic_add(nr_pages_requested,
2540*4882a593Smuzhiyun &kctx->kbdev->memdev.used_pages);
2541*4882a593Smuzhiyun
2542*4882a593Smuzhiyun /* Increase mm counters before we allocate pages so that this
2543*4882a593Smuzhiyun * allocation is visible to the OOM killer
2544*4882a593Smuzhiyun */
2545*4882a593Smuzhiyun kbase_process_page_usage_inc(kctx, nr_pages_requested);
2546*4882a593Smuzhiyun
2547*4882a593Smuzhiyun tp = alloc->pages + alloc->nents;
2548*4882a593Smuzhiyun
2549*4882a593Smuzhiyun /* Check if we have enough pages requested so we can allocate a large
2550*4882a593Smuzhiyun * page (512 * 4KB = 2MB )
2551*4882a593Smuzhiyun */
2552*4882a593Smuzhiyun if (kbdev->pagesize_2mb && nr_left >= (SZ_2M / SZ_4K)) {
2553*4882a593Smuzhiyun int nr_lp = nr_left / (SZ_2M / SZ_4K);
2554*4882a593Smuzhiyun
2555*4882a593Smuzhiyun res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
2556*4882a593Smuzhiyun nr_lp * (SZ_2M / SZ_4K), tp, true, kctx->task);
2557*4882a593Smuzhiyun
2558*4882a593Smuzhiyun if (res > 0) {
2559*4882a593Smuzhiyun nr_left -= res;
2560*4882a593Smuzhiyun tp += res;
2561*4882a593Smuzhiyun }
2562*4882a593Smuzhiyun
2563*4882a593Smuzhiyun if (nr_left) {
2564*4882a593Smuzhiyun struct kbase_sub_alloc *sa, *temp_sa;
2565*4882a593Smuzhiyun
2566*4882a593Smuzhiyun spin_lock(&kctx->mem_partials_lock);
2567*4882a593Smuzhiyun
2568*4882a593Smuzhiyun list_for_each_entry_safe(sa, temp_sa,
2569*4882a593Smuzhiyun &kctx->mem_partials, link) {
2570*4882a593Smuzhiyun int pidx = 0;
2571*4882a593Smuzhiyun
2572*4882a593Smuzhiyun while (nr_left) {
2573*4882a593Smuzhiyun pidx = find_next_zero_bit(sa->sub_pages,
2574*4882a593Smuzhiyun SZ_2M / SZ_4K,
2575*4882a593Smuzhiyun pidx);
2576*4882a593Smuzhiyun bitmap_set(sa->sub_pages, pidx, 1);
2577*4882a593Smuzhiyun *tp++ = as_tagged_tag(page_to_phys(sa->page +
2578*4882a593Smuzhiyun pidx),
2579*4882a593Smuzhiyun FROM_PARTIAL);
2580*4882a593Smuzhiyun nr_left--;
2581*4882a593Smuzhiyun
2582*4882a593Smuzhiyun if (bitmap_full(sa->sub_pages, SZ_2M / SZ_4K)) {
2583*4882a593Smuzhiyun /* unlink from partial list when full */
2584*4882a593Smuzhiyun list_del_init(&sa->link);
2585*4882a593Smuzhiyun break;
2586*4882a593Smuzhiyun }
2587*4882a593Smuzhiyun }
2588*4882a593Smuzhiyun }
2589*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
2590*4882a593Smuzhiyun }
2591*4882a593Smuzhiyun
2592*4882a593Smuzhiyun /* only if we actually have a chunk left <512. If more it indicates
2593*4882a593Smuzhiyun * that we couldn't allocate a 2MB above, so no point to retry here.
2594*4882a593Smuzhiyun */
2595*4882a593Smuzhiyun if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
2596*4882a593Smuzhiyun /* create a new partial and suballocate the rest from it */
2597*4882a593Smuzhiyun struct page *np = NULL;
2598*4882a593Smuzhiyun
2599*4882a593Smuzhiyun do {
2600*4882a593Smuzhiyun int err;
2601*4882a593Smuzhiyun
2602*4882a593Smuzhiyun np = kbase_mem_pool_alloc(
2603*4882a593Smuzhiyun &kctx->mem_pools.large[
2604*4882a593Smuzhiyun alloc->group_id]);
2605*4882a593Smuzhiyun if (np)
2606*4882a593Smuzhiyun break;
2607*4882a593Smuzhiyun
2608*4882a593Smuzhiyun err = kbase_mem_pool_grow(
2609*4882a593Smuzhiyun &kctx->mem_pools.large[alloc->group_id],
2610*4882a593Smuzhiyun 1, kctx->task);
2611*4882a593Smuzhiyun if (err)
2612*4882a593Smuzhiyun break;
2613*4882a593Smuzhiyun } while (1);
2614*4882a593Smuzhiyun
2615*4882a593Smuzhiyun if (np) {
2616*4882a593Smuzhiyun int i;
2617*4882a593Smuzhiyun struct kbase_sub_alloc *sa;
2618*4882a593Smuzhiyun struct page *p;
2619*4882a593Smuzhiyun
2620*4882a593Smuzhiyun sa = kmalloc(sizeof(*sa), GFP_KERNEL);
2621*4882a593Smuzhiyun if (!sa) {
2622*4882a593Smuzhiyun kbase_mem_pool_free(
2623*4882a593Smuzhiyun &kctx->mem_pools.large[
2624*4882a593Smuzhiyun alloc->group_id],
2625*4882a593Smuzhiyun np,
2626*4882a593Smuzhiyun false);
2627*4882a593Smuzhiyun goto no_new_partial;
2628*4882a593Smuzhiyun }
2629*4882a593Smuzhiyun
2630*4882a593Smuzhiyun /* store pointers back to the control struct */
2631*4882a593Smuzhiyun np->lru.next = (void *)sa;
2632*4882a593Smuzhiyun for (p = np; p < np + SZ_2M / SZ_4K; p++)
2633*4882a593Smuzhiyun p->lru.prev = (void *)np;
2634*4882a593Smuzhiyun INIT_LIST_HEAD(&sa->link);
2635*4882a593Smuzhiyun bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
2636*4882a593Smuzhiyun sa->page = np;
2637*4882a593Smuzhiyun
2638*4882a593Smuzhiyun for (i = 0; i < nr_left; i++)
2639*4882a593Smuzhiyun *tp++ = as_tagged_tag(page_to_phys(np + i), FROM_PARTIAL);
2640*4882a593Smuzhiyun
2641*4882a593Smuzhiyun bitmap_set(sa->sub_pages, 0, nr_left);
2642*4882a593Smuzhiyun nr_left = 0;
2643*4882a593Smuzhiyun
2644*4882a593Smuzhiyun /* expose for later use */
2645*4882a593Smuzhiyun spin_lock(&kctx->mem_partials_lock);
2646*4882a593Smuzhiyun list_add(&sa->link, &kctx->mem_partials);
2647*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
2648*4882a593Smuzhiyun }
2649*4882a593Smuzhiyun }
2650*4882a593Smuzhiyun }
2651*4882a593Smuzhiyun
2652*4882a593Smuzhiyun no_new_partial:
2653*4882a593Smuzhiyun if (nr_left) {
2654*4882a593Smuzhiyun res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.small[alloc->group_id], nr_left,
2655*4882a593Smuzhiyun tp, false, kctx->task);
2656*4882a593Smuzhiyun if (res <= 0)
2657*4882a593Smuzhiyun goto alloc_failed;
2658*4882a593Smuzhiyun }
2659*4882a593Smuzhiyun
2660*4882a593Smuzhiyun KBASE_TLSTREAM_AUX_PAGESALLOC(
2661*4882a593Smuzhiyun kbdev,
2662*4882a593Smuzhiyun kctx->id,
2663*4882a593Smuzhiyun (u64)new_page_count);
2664*4882a593Smuzhiyun
2665*4882a593Smuzhiyun alloc->nents += nr_pages_requested;
2666*4882a593Smuzhiyun
2667*4882a593Smuzhiyun kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
2668*4882a593Smuzhiyun
2669*4882a593Smuzhiyun done:
2670*4882a593Smuzhiyun return 0;
2671*4882a593Smuzhiyun
2672*4882a593Smuzhiyun alloc_failed:
2673*4882a593Smuzhiyun /* rollback needed if got one or more 2MB but failed later */
2674*4882a593Smuzhiyun if (nr_left != nr_pages_requested) {
2675*4882a593Smuzhiyun size_t nr_pages_to_free = nr_pages_requested - nr_left;
2676*4882a593Smuzhiyun
2677*4882a593Smuzhiyun alloc->nents += nr_pages_to_free;
2678*4882a593Smuzhiyun
2679*4882a593Smuzhiyun kbase_process_page_usage_inc(kctx, nr_pages_to_free);
2680*4882a593Smuzhiyun atomic_add(nr_pages_to_free, &kctx->used_pages);
2681*4882a593Smuzhiyun atomic_add(nr_pages_to_free,
2682*4882a593Smuzhiyun &kctx->kbdev->memdev.used_pages);
2683*4882a593Smuzhiyun
2684*4882a593Smuzhiyun kbase_free_phy_pages_helper(alloc, nr_pages_to_free);
2685*4882a593Smuzhiyun }
2686*4882a593Smuzhiyun
2687*4882a593Smuzhiyun kbase_process_page_usage_dec(kctx, nr_pages_requested);
2688*4882a593Smuzhiyun atomic_sub(nr_pages_requested, &kctx->used_pages);
2689*4882a593Smuzhiyun atomic_sub(nr_pages_requested,
2690*4882a593Smuzhiyun &kctx->kbdev->memdev.used_pages);
2691*4882a593Smuzhiyun
2692*4882a593Smuzhiyun invalid_request:
2693*4882a593Smuzhiyun return -ENOMEM;
2694*4882a593Smuzhiyun }
2695*4882a593Smuzhiyun
kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_alloc * alloc,struct kbase_mem_pool * pool,size_t nr_pages_requested,struct kbase_sub_alloc ** prealloc_sa)2696*4882a593Smuzhiyun struct tagged_addr *kbase_alloc_phy_pages_helper_locked(
2697*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc, struct kbase_mem_pool *pool,
2698*4882a593Smuzhiyun size_t nr_pages_requested,
2699*4882a593Smuzhiyun struct kbase_sub_alloc **prealloc_sa)
2700*4882a593Smuzhiyun {
2701*4882a593Smuzhiyun int new_page_count __maybe_unused;
2702*4882a593Smuzhiyun size_t nr_left = nr_pages_requested;
2703*4882a593Smuzhiyun int res;
2704*4882a593Smuzhiyun struct kbase_context *kctx;
2705*4882a593Smuzhiyun struct kbase_device *kbdev;
2706*4882a593Smuzhiyun struct tagged_addr *tp;
2707*4882a593Smuzhiyun struct tagged_addr *new_pages = NULL;
2708*4882a593Smuzhiyun
2709*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
2710*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
2711*4882a593Smuzhiyun
2712*4882a593Smuzhiyun lockdep_assert_held(&pool->pool_lock);
2713*4882a593Smuzhiyun
2714*4882a593Smuzhiyun kctx = alloc->imported.native.kctx;
2715*4882a593Smuzhiyun kbdev = kctx->kbdev;
2716*4882a593Smuzhiyun
2717*4882a593Smuzhiyun if (!kbdev->pagesize_2mb)
2718*4882a593Smuzhiyun WARN_ON(pool->order);
2719*4882a593Smuzhiyun
2720*4882a593Smuzhiyun if (alloc->reg) {
2721*4882a593Smuzhiyun if (nr_pages_requested > alloc->reg->nr_pages - alloc->nents)
2722*4882a593Smuzhiyun goto invalid_request;
2723*4882a593Smuzhiyun }
2724*4882a593Smuzhiyun
2725*4882a593Smuzhiyun lockdep_assert_held(&kctx->mem_partials_lock);
2726*4882a593Smuzhiyun
2727*4882a593Smuzhiyun if (nr_pages_requested == 0)
2728*4882a593Smuzhiyun goto done; /*nothing to do*/
2729*4882a593Smuzhiyun
2730*4882a593Smuzhiyun new_page_count = atomic_add_return(
2731*4882a593Smuzhiyun nr_pages_requested, &kctx->used_pages);
2732*4882a593Smuzhiyun atomic_add(nr_pages_requested,
2733*4882a593Smuzhiyun &kctx->kbdev->memdev.used_pages);
2734*4882a593Smuzhiyun
2735*4882a593Smuzhiyun /* Increase mm counters before we allocate pages so that this
2736*4882a593Smuzhiyun * allocation is visible to the OOM killer
2737*4882a593Smuzhiyun */
2738*4882a593Smuzhiyun kbase_process_page_usage_inc(kctx, nr_pages_requested);
2739*4882a593Smuzhiyun
2740*4882a593Smuzhiyun tp = alloc->pages + alloc->nents;
2741*4882a593Smuzhiyun new_pages = tp;
2742*4882a593Smuzhiyun
2743*4882a593Smuzhiyun if (kbdev->pagesize_2mb && pool->order) {
2744*4882a593Smuzhiyun int nr_lp = nr_left / (SZ_2M / SZ_4K);
2745*4882a593Smuzhiyun
2746*4882a593Smuzhiyun res = kbase_mem_pool_alloc_pages_locked(pool,
2747*4882a593Smuzhiyun nr_lp * (SZ_2M / SZ_4K),
2748*4882a593Smuzhiyun tp);
2749*4882a593Smuzhiyun
2750*4882a593Smuzhiyun if (res > 0) {
2751*4882a593Smuzhiyun nr_left -= res;
2752*4882a593Smuzhiyun tp += res;
2753*4882a593Smuzhiyun }
2754*4882a593Smuzhiyun
2755*4882a593Smuzhiyun if (nr_left) {
2756*4882a593Smuzhiyun struct kbase_sub_alloc *sa, *temp_sa;
2757*4882a593Smuzhiyun
2758*4882a593Smuzhiyun list_for_each_entry_safe(sa, temp_sa,
2759*4882a593Smuzhiyun &kctx->mem_partials, link) {
2760*4882a593Smuzhiyun int pidx = 0;
2761*4882a593Smuzhiyun
2762*4882a593Smuzhiyun while (nr_left) {
2763*4882a593Smuzhiyun pidx = find_next_zero_bit(sa->sub_pages,
2764*4882a593Smuzhiyun SZ_2M / SZ_4K,
2765*4882a593Smuzhiyun pidx);
2766*4882a593Smuzhiyun bitmap_set(sa->sub_pages, pidx, 1);
2767*4882a593Smuzhiyun *tp++ = as_tagged_tag(page_to_phys(
2768*4882a593Smuzhiyun sa->page + pidx),
2769*4882a593Smuzhiyun FROM_PARTIAL);
2770*4882a593Smuzhiyun nr_left--;
2771*4882a593Smuzhiyun
2772*4882a593Smuzhiyun if (bitmap_full(sa->sub_pages,
2773*4882a593Smuzhiyun SZ_2M / SZ_4K)) {
2774*4882a593Smuzhiyun /* unlink from partial list when
2775*4882a593Smuzhiyun * full
2776*4882a593Smuzhiyun */
2777*4882a593Smuzhiyun list_del_init(&sa->link);
2778*4882a593Smuzhiyun break;
2779*4882a593Smuzhiyun }
2780*4882a593Smuzhiyun }
2781*4882a593Smuzhiyun }
2782*4882a593Smuzhiyun }
2783*4882a593Smuzhiyun
2784*4882a593Smuzhiyun /* only if we actually have a chunk left <512. If more it
2785*4882a593Smuzhiyun * indicates that we couldn't allocate a 2MB above, so no point
2786*4882a593Smuzhiyun * to retry here.
2787*4882a593Smuzhiyun */
2788*4882a593Smuzhiyun if (nr_left > 0 && nr_left < (SZ_2M / SZ_4K)) {
2789*4882a593Smuzhiyun /* create a new partial and suballocate the rest from it
2790*4882a593Smuzhiyun */
2791*4882a593Smuzhiyun struct page *np = NULL;
2792*4882a593Smuzhiyun
2793*4882a593Smuzhiyun np = kbase_mem_pool_alloc_locked(pool);
2794*4882a593Smuzhiyun
2795*4882a593Smuzhiyun if (np) {
2796*4882a593Smuzhiyun int i;
2797*4882a593Smuzhiyun struct kbase_sub_alloc *const sa = *prealloc_sa;
2798*4882a593Smuzhiyun struct page *p;
2799*4882a593Smuzhiyun
2800*4882a593Smuzhiyun /* store pointers back to the control struct */
2801*4882a593Smuzhiyun np->lru.next = (void *)sa;
2802*4882a593Smuzhiyun for (p = np; p < np + SZ_2M / SZ_4K; p++)
2803*4882a593Smuzhiyun p->lru.prev = (void *)np;
2804*4882a593Smuzhiyun INIT_LIST_HEAD(&sa->link);
2805*4882a593Smuzhiyun bitmap_zero(sa->sub_pages, SZ_2M / SZ_4K);
2806*4882a593Smuzhiyun sa->page = np;
2807*4882a593Smuzhiyun
2808*4882a593Smuzhiyun for (i = 0; i < nr_left; i++)
2809*4882a593Smuzhiyun *tp++ = as_tagged_tag(
2810*4882a593Smuzhiyun page_to_phys(np + i),
2811*4882a593Smuzhiyun FROM_PARTIAL);
2812*4882a593Smuzhiyun
2813*4882a593Smuzhiyun bitmap_set(sa->sub_pages, 0, nr_left);
2814*4882a593Smuzhiyun nr_left = 0;
2815*4882a593Smuzhiyun /* Indicate to user that we'll free this memory
2816*4882a593Smuzhiyun * later.
2817*4882a593Smuzhiyun */
2818*4882a593Smuzhiyun *prealloc_sa = NULL;
2819*4882a593Smuzhiyun
2820*4882a593Smuzhiyun /* expose for later use */
2821*4882a593Smuzhiyun list_add(&sa->link, &kctx->mem_partials);
2822*4882a593Smuzhiyun }
2823*4882a593Smuzhiyun }
2824*4882a593Smuzhiyun if (nr_left)
2825*4882a593Smuzhiyun goto alloc_failed;
2826*4882a593Smuzhiyun } else {
2827*4882a593Smuzhiyun res = kbase_mem_pool_alloc_pages_locked(pool,
2828*4882a593Smuzhiyun nr_left,
2829*4882a593Smuzhiyun tp);
2830*4882a593Smuzhiyun if (res <= 0)
2831*4882a593Smuzhiyun goto alloc_failed;
2832*4882a593Smuzhiyun }
2833*4882a593Smuzhiyun
2834*4882a593Smuzhiyun KBASE_TLSTREAM_AUX_PAGESALLOC(
2835*4882a593Smuzhiyun kbdev,
2836*4882a593Smuzhiyun kctx->id,
2837*4882a593Smuzhiyun (u64)new_page_count);
2838*4882a593Smuzhiyun
2839*4882a593Smuzhiyun alloc->nents += nr_pages_requested;
2840*4882a593Smuzhiyun
2841*4882a593Smuzhiyun kbase_trace_gpu_mem_usage_inc(kctx->kbdev, kctx, nr_pages_requested);
2842*4882a593Smuzhiyun
2843*4882a593Smuzhiyun done:
2844*4882a593Smuzhiyun return new_pages;
2845*4882a593Smuzhiyun
2846*4882a593Smuzhiyun alloc_failed:
2847*4882a593Smuzhiyun /* rollback needed if got one or more 2MB but failed later */
2848*4882a593Smuzhiyun if (nr_left != nr_pages_requested) {
2849*4882a593Smuzhiyun size_t nr_pages_to_free = nr_pages_requested - nr_left;
2850*4882a593Smuzhiyun
2851*4882a593Smuzhiyun struct tagged_addr *start_free = alloc->pages + alloc->nents;
2852*4882a593Smuzhiyun
2853*4882a593Smuzhiyun if (kbdev->pagesize_2mb && pool->order) {
2854*4882a593Smuzhiyun while (nr_pages_to_free) {
2855*4882a593Smuzhiyun if (is_huge_head(*start_free)) {
2856*4882a593Smuzhiyun kbase_mem_pool_free_pages_locked(
2857*4882a593Smuzhiyun pool, 512,
2858*4882a593Smuzhiyun start_free,
2859*4882a593Smuzhiyun false, /* not dirty */
2860*4882a593Smuzhiyun true); /* return to pool */
2861*4882a593Smuzhiyun nr_pages_to_free -= 512;
2862*4882a593Smuzhiyun start_free += 512;
2863*4882a593Smuzhiyun } else if (is_partial(*start_free)) {
2864*4882a593Smuzhiyun free_partial_locked(kctx, pool,
2865*4882a593Smuzhiyun *start_free);
2866*4882a593Smuzhiyun nr_pages_to_free--;
2867*4882a593Smuzhiyun start_free++;
2868*4882a593Smuzhiyun }
2869*4882a593Smuzhiyun }
2870*4882a593Smuzhiyun } else {
2871*4882a593Smuzhiyun kbase_mem_pool_free_pages_locked(pool,
2872*4882a593Smuzhiyun nr_pages_to_free,
2873*4882a593Smuzhiyun start_free,
2874*4882a593Smuzhiyun false, /* not dirty */
2875*4882a593Smuzhiyun true); /* return to pool */
2876*4882a593Smuzhiyun }
2877*4882a593Smuzhiyun }
2878*4882a593Smuzhiyun
2879*4882a593Smuzhiyun kbase_process_page_usage_dec(kctx, nr_pages_requested);
2880*4882a593Smuzhiyun atomic_sub(nr_pages_requested, &kctx->used_pages);
2881*4882a593Smuzhiyun atomic_sub(nr_pages_requested, &kctx->kbdev->memdev.used_pages);
2882*4882a593Smuzhiyun
2883*4882a593Smuzhiyun invalid_request:
2884*4882a593Smuzhiyun return NULL;
2885*4882a593Smuzhiyun }
2886*4882a593Smuzhiyun
free_partial(struct kbase_context * kctx,int group_id,struct tagged_addr tp)2887*4882a593Smuzhiyun static void free_partial(struct kbase_context *kctx, int group_id, struct
2888*4882a593Smuzhiyun tagged_addr tp)
2889*4882a593Smuzhiyun {
2890*4882a593Smuzhiyun struct page *p, *head_page;
2891*4882a593Smuzhiyun struct kbase_sub_alloc *sa;
2892*4882a593Smuzhiyun
2893*4882a593Smuzhiyun p = as_page(tp);
2894*4882a593Smuzhiyun head_page = (struct page *)p->lru.prev;
2895*4882a593Smuzhiyun sa = (struct kbase_sub_alloc *)head_page->lru.next;
2896*4882a593Smuzhiyun spin_lock(&kctx->mem_partials_lock);
2897*4882a593Smuzhiyun clear_bit(p - head_page, sa->sub_pages);
2898*4882a593Smuzhiyun if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
2899*4882a593Smuzhiyun list_del(&sa->link);
2900*4882a593Smuzhiyun kbase_mem_pool_free(
2901*4882a593Smuzhiyun &kctx->mem_pools.large[group_id],
2902*4882a593Smuzhiyun head_page,
2903*4882a593Smuzhiyun true);
2904*4882a593Smuzhiyun kfree(sa);
2905*4882a593Smuzhiyun } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
2906*4882a593Smuzhiyun SZ_2M / SZ_4K - 1) {
2907*4882a593Smuzhiyun /* expose the partial again */
2908*4882a593Smuzhiyun list_add(&sa->link, &kctx->mem_partials);
2909*4882a593Smuzhiyun }
2910*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
2911*4882a593Smuzhiyun }
2912*4882a593Smuzhiyun
kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc * alloc,size_t nr_pages_to_free)2913*4882a593Smuzhiyun int kbase_free_phy_pages_helper(
2914*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc,
2915*4882a593Smuzhiyun size_t nr_pages_to_free)
2916*4882a593Smuzhiyun {
2917*4882a593Smuzhiyun struct kbase_context *kctx = alloc->imported.native.kctx;
2918*4882a593Smuzhiyun struct kbase_device *kbdev = kctx->kbdev;
2919*4882a593Smuzhiyun bool syncback;
2920*4882a593Smuzhiyun bool reclaimed = (alloc->evicted != 0);
2921*4882a593Smuzhiyun struct tagged_addr *start_free;
2922*4882a593Smuzhiyun int new_page_count __maybe_unused;
2923*4882a593Smuzhiyun size_t freed = 0;
2924*4882a593Smuzhiyun
2925*4882a593Smuzhiyun if (WARN_ON(alloc->type != KBASE_MEM_TYPE_NATIVE) ||
2926*4882a593Smuzhiyun WARN_ON(alloc->imported.native.kctx == NULL) ||
2927*4882a593Smuzhiyun WARN_ON(alloc->nents < nr_pages_to_free) ||
2928*4882a593Smuzhiyun WARN_ON(alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
2929*4882a593Smuzhiyun return -EINVAL;
2930*4882a593Smuzhiyun }
2931*4882a593Smuzhiyun
2932*4882a593Smuzhiyun /* early out if nothing to do */
2933*4882a593Smuzhiyun if (nr_pages_to_free == 0)
2934*4882a593Smuzhiyun return 0;
2935*4882a593Smuzhiyun
2936*4882a593Smuzhiyun start_free = alloc->pages + alloc->nents - nr_pages_to_free;
2937*4882a593Smuzhiyun
2938*4882a593Smuzhiyun syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
2939*4882a593Smuzhiyun
2940*4882a593Smuzhiyun /* pad start_free to a valid start location */
2941*4882a593Smuzhiyun while (nr_pages_to_free && is_huge(*start_free) &&
2942*4882a593Smuzhiyun !is_huge_head(*start_free)) {
2943*4882a593Smuzhiyun nr_pages_to_free--;
2944*4882a593Smuzhiyun start_free++;
2945*4882a593Smuzhiyun }
2946*4882a593Smuzhiyun
2947*4882a593Smuzhiyun while (nr_pages_to_free) {
2948*4882a593Smuzhiyun if (is_huge_head(*start_free)) {
2949*4882a593Smuzhiyun /* This is a 2MB entry, so free all the 512 pages that
2950*4882a593Smuzhiyun * it points to
2951*4882a593Smuzhiyun */
2952*4882a593Smuzhiyun kbase_mem_pool_free_pages(
2953*4882a593Smuzhiyun &kctx->mem_pools.large[alloc->group_id],
2954*4882a593Smuzhiyun 512,
2955*4882a593Smuzhiyun start_free,
2956*4882a593Smuzhiyun syncback,
2957*4882a593Smuzhiyun reclaimed);
2958*4882a593Smuzhiyun nr_pages_to_free -= 512;
2959*4882a593Smuzhiyun start_free += 512;
2960*4882a593Smuzhiyun freed += 512;
2961*4882a593Smuzhiyun } else if (is_partial(*start_free)) {
2962*4882a593Smuzhiyun free_partial(kctx, alloc->group_id, *start_free);
2963*4882a593Smuzhiyun nr_pages_to_free--;
2964*4882a593Smuzhiyun start_free++;
2965*4882a593Smuzhiyun freed++;
2966*4882a593Smuzhiyun } else {
2967*4882a593Smuzhiyun struct tagged_addr *local_end_free;
2968*4882a593Smuzhiyun
2969*4882a593Smuzhiyun local_end_free = start_free;
2970*4882a593Smuzhiyun while (nr_pages_to_free &&
2971*4882a593Smuzhiyun !is_huge(*local_end_free) &&
2972*4882a593Smuzhiyun !is_partial(*local_end_free)) {
2973*4882a593Smuzhiyun local_end_free++;
2974*4882a593Smuzhiyun nr_pages_to_free--;
2975*4882a593Smuzhiyun }
2976*4882a593Smuzhiyun kbase_mem_pool_free_pages(
2977*4882a593Smuzhiyun &kctx->mem_pools.small[alloc->group_id],
2978*4882a593Smuzhiyun local_end_free - start_free,
2979*4882a593Smuzhiyun start_free,
2980*4882a593Smuzhiyun syncback,
2981*4882a593Smuzhiyun reclaimed);
2982*4882a593Smuzhiyun freed += local_end_free - start_free;
2983*4882a593Smuzhiyun start_free += local_end_free - start_free;
2984*4882a593Smuzhiyun }
2985*4882a593Smuzhiyun }
2986*4882a593Smuzhiyun
2987*4882a593Smuzhiyun alloc->nents -= freed;
2988*4882a593Smuzhiyun
2989*4882a593Smuzhiyun /*
2990*4882a593Smuzhiyun * If the allocation was not evicted (i.e. evicted == 0) then
2991*4882a593Smuzhiyun * the page accounting needs to be done.
2992*4882a593Smuzhiyun */
2993*4882a593Smuzhiyun if (!reclaimed) {
2994*4882a593Smuzhiyun kbase_process_page_usage_dec(kctx, freed);
2995*4882a593Smuzhiyun new_page_count = atomic_sub_return(freed,
2996*4882a593Smuzhiyun &kctx->used_pages);
2997*4882a593Smuzhiyun atomic_sub(freed,
2998*4882a593Smuzhiyun &kctx->kbdev->memdev.used_pages);
2999*4882a593Smuzhiyun
3000*4882a593Smuzhiyun KBASE_TLSTREAM_AUX_PAGESALLOC(
3001*4882a593Smuzhiyun kbdev,
3002*4882a593Smuzhiyun kctx->id,
3003*4882a593Smuzhiyun (u64)new_page_count);
3004*4882a593Smuzhiyun
3005*4882a593Smuzhiyun kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
3006*4882a593Smuzhiyun }
3007*4882a593Smuzhiyun
3008*4882a593Smuzhiyun return 0;
3009*4882a593Smuzhiyun }
3010*4882a593Smuzhiyun
free_partial_locked(struct kbase_context * kctx,struct kbase_mem_pool * pool,struct tagged_addr tp)3011*4882a593Smuzhiyun static void free_partial_locked(struct kbase_context *kctx,
3012*4882a593Smuzhiyun struct kbase_mem_pool *pool, struct tagged_addr tp)
3013*4882a593Smuzhiyun {
3014*4882a593Smuzhiyun struct page *p, *head_page;
3015*4882a593Smuzhiyun struct kbase_sub_alloc *sa;
3016*4882a593Smuzhiyun
3017*4882a593Smuzhiyun lockdep_assert_held(&pool->pool_lock);
3018*4882a593Smuzhiyun lockdep_assert_held(&kctx->mem_partials_lock);
3019*4882a593Smuzhiyun
3020*4882a593Smuzhiyun p = as_page(tp);
3021*4882a593Smuzhiyun head_page = (struct page *)p->lru.prev;
3022*4882a593Smuzhiyun sa = (struct kbase_sub_alloc *)head_page->lru.next;
3023*4882a593Smuzhiyun clear_bit(p - head_page, sa->sub_pages);
3024*4882a593Smuzhiyun if (bitmap_empty(sa->sub_pages, SZ_2M / SZ_4K)) {
3025*4882a593Smuzhiyun list_del(&sa->link);
3026*4882a593Smuzhiyun kbase_mem_pool_free_locked(pool, head_page, true);
3027*4882a593Smuzhiyun kfree(sa);
3028*4882a593Smuzhiyun } else if (bitmap_weight(sa->sub_pages, SZ_2M / SZ_4K) ==
3029*4882a593Smuzhiyun SZ_2M / SZ_4K - 1) {
3030*4882a593Smuzhiyun /* expose the partial again */
3031*4882a593Smuzhiyun list_add(&sa->link, &kctx->mem_partials);
3032*4882a593Smuzhiyun }
3033*4882a593Smuzhiyun }
3034*4882a593Smuzhiyun
kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc * alloc,struct kbase_mem_pool * pool,struct tagged_addr * pages,size_t nr_pages_to_free)3035*4882a593Smuzhiyun void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
3036*4882a593Smuzhiyun struct kbase_mem_pool *pool, struct tagged_addr *pages,
3037*4882a593Smuzhiyun size_t nr_pages_to_free)
3038*4882a593Smuzhiyun {
3039*4882a593Smuzhiyun struct kbase_context *kctx = alloc->imported.native.kctx;
3040*4882a593Smuzhiyun struct kbase_device *kbdev = kctx->kbdev;
3041*4882a593Smuzhiyun bool syncback;
3042*4882a593Smuzhiyun bool reclaimed = (alloc->evicted != 0);
3043*4882a593Smuzhiyun struct tagged_addr *start_free;
3044*4882a593Smuzhiyun size_t freed = 0;
3045*4882a593Smuzhiyun
3046*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_NATIVE);
3047*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->imported.native.kctx);
3048*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->nents >= nr_pages_to_free);
3049*4882a593Smuzhiyun
3050*4882a593Smuzhiyun lockdep_assert_held(&pool->pool_lock);
3051*4882a593Smuzhiyun lockdep_assert_held(&kctx->mem_partials_lock);
3052*4882a593Smuzhiyun
3053*4882a593Smuzhiyun /* early out if nothing to do */
3054*4882a593Smuzhiyun if (!nr_pages_to_free)
3055*4882a593Smuzhiyun return;
3056*4882a593Smuzhiyun
3057*4882a593Smuzhiyun start_free = pages;
3058*4882a593Smuzhiyun
3059*4882a593Smuzhiyun syncback = alloc->properties & KBASE_MEM_PHY_ALLOC_ACCESSED_CACHED;
3060*4882a593Smuzhiyun
3061*4882a593Smuzhiyun /* pad start_free to a valid start location */
3062*4882a593Smuzhiyun while (nr_pages_to_free && is_huge(*start_free) &&
3063*4882a593Smuzhiyun !is_huge_head(*start_free)) {
3064*4882a593Smuzhiyun nr_pages_to_free--;
3065*4882a593Smuzhiyun start_free++;
3066*4882a593Smuzhiyun }
3067*4882a593Smuzhiyun
3068*4882a593Smuzhiyun while (nr_pages_to_free) {
3069*4882a593Smuzhiyun if (is_huge_head(*start_free)) {
3070*4882a593Smuzhiyun /* This is a 2MB entry, so free all the 512 pages that
3071*4882a593Smuzhiyun * it points to
3072*4882a593Smuzhiyun */
3073*4882a593Smuzhiyun WARN_ON(!pool->order);
3074*4882a593Smuzhiyun kbase_mem_pool_free_pages_locked(pool,
3075*4882a593Smuzhiyun 512,
3076*4882a593Smuzhiyun start_free,
3077*4882a593Smuzhiyun syncback,
3078*4882a593Smuzhiyun reclaimed);
3079*4882a593Smuzhiyun nr_pages_to_free -= 512;
3080*4882a593Smuzhiyun start_free += 512;
3081*4882a593Smuzhiyun freed += 512;
3082*4882a593Smuzhiyun } else if (is_partial(*start_free)) {
3083*4882a593Smuzhiyun WARN_ON(!pool->order);
3084*4882a593Smuzhiyun free_partial_locked(kctx, pool, *start_free);
3085*4882a593Smuzhiyun nr_pages_to_free--;
3086*4882a593Smuzhiyun start_free++;
3087*4882a593Smuzhiyun freed++;
3088*4882a593Smuzhiyun } else {
3089*4882a593Smuzhiyun struct tagged_addr *local_end_free;
3090*4882a593Smuzhiyun
3091*4882a593Smuzhiyun WARN_ON(pool->order);
3092*4882a593Smuzhiyun local_end_free = start_free;
3093*4882a593Smuzhiyun while (nr_pages_to_free &&
3094*4882a593Smuzhiyun !is_huge(*local_end_free) &&
3095*4882a593Smuzhiyun !is_partial(*local_end_free)) {
3096*4882a593Smuzhiyun local_end_free++;
3097*4882a593Smuzhiyun nr_pages_to_free--;
3098*4882a593Smuzhiyun }
3099*4882a593Smuzhiyun kbase_mem_pool_free_pages_locked(pool,
3100*4882a593Smuzhiyun local_end_free - start_free,
3101*4882a593Smuzhiyun start_free,
3102*4882a593Smuzhiyun syncback,
3103*4882a593Smuzhiyun reclaimed);
3104*4882a593Smuzhiyun freed += local_end_free - start_free;
3105*4882a593Smuzhiyun start_free += local_end_free - start_free;
3106*4882a593Smuzhiyun }
3107*4882a593Smuzhiyun }
3108*4882a593Smuzhiyun
3109*4882a593Smuzhiyun alloc->nents -= freed;
3110*4882a593Smuzhiyun
3111*4882a593Smuzhiyun /*
3112*4882a593Smuzhiyun * If the allocation was not evicted (i.e. evicted == 0) then
3113*4882a593Smuzhiyun * the page accounting needs to be done.
3114*4882a593Smuzhiyun */
3115*4882a593Smuzhiyun if (!reclaimed) {
3116*4882a593Smuzhiyun int new_page_count;
3117*4882a593Smuzhiyun
3118*4882a593Smuzhiyun kbase_process_page_usage_dec(kctx, freed);
3119*4882a593Smuzhiyun new_page_count = atomic_sub_return(freed,
3120*4882a593Smuzhiyun &kctx->used_pages);
3121*4882a593Smuzhiyun atomic_sub(freed,
3122*4882a593Smuzhiyun &kctx->kbdev->memdev.used_pages);
3123*4882a593Smuzhiyun
3124*4882a593Smuzhiyun KBASE_TLSTREAM_AUX_PAGESALLOC(
3125*4882a593Smuzhiyun kbdev,
3126*4882a593Smuzhiyun kctx->id,
3127*4882a593Smuzhiyun (u64)new_page_count);
3128*4882a593Smuzhiyun
3129*4882a593Smuzhiyun kbase_trace_gpu_mem_usage_dec(kctx->kbdev, kctx, freed);
3130*4882a593Smuzhiyun }
3131*4882a593Smuzhiyun }
3132*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper_locked);
3133*4882a593Smuzhiyun
3134*4882a593Smuzhiyun #if MALI_USE_CSF
3135*4882a593Smuzhiyun /**
3136*4882a593Smuzhiyun * kbase_jd_user_buf_unpin_pages - Release the pinned pages of a user buffer.
3137*4882a593Smuzhiyun * @alloc: The allocation for the imported user buffer.
3138*4882a593Smuzhiyun *
3139*4882a593Smuzhiyun * This must only be called when terminating an alloc, when its refcount
3140*4882a593Smuzhiyun * (number of users) has become 0. This also ensures it is only called once all
3141*4882a593Smuzhiyun * CPU mappings have been closed.
3142*4882a593Smuzhiyun *
3143*4882a593Smuzhiyun * Instead call kbase_jd_user_buf_unmap() if you need to unpin pages on active
3144*4882a593Smuzhiyun * allocations
3145*4882a593Smuzhiyun */
3146*4882a593Smuzhiyun static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc);
3147*4882a593Smuzhiyun #endif
3148*4882a593Smuzhiyun
kbase_mem_kref_free(struct kref * kref)3149*4882a593Smuzhiyun void kbase_mem_kref_free(struct kref *kref)
3150*4882a593Smuzhiyun {
3151*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc;
3152*4882a593Smuzhiyun
3153*4882a593Smuzhiyun alloc = container_of(kref, struct kbase_mem_phy_alloc, kref);
3154*4882a593Smuzhiyun
3155*4882a593Smuzhiyun switch (alloc->type) {
3156*4882a593Smuzhiyun case KBASE_MEM_TYPE_NATIVE: {
3157*4882a593Smuzhiyun
3158*4882a593Smuzhiyun if (!WARN_ON(!alloc->imported.native.kctx)) {
3159*4882a593Smuzhiyun if (alloc->permanent_map)
3160*4882a593Smuzhiyun kbase_phy_alloc_mapping_term(
3161*4882a593Smuzhiyun alloc->imported.native.kctx,
3162*4882a593Smuzhiyun alloc);
3163*4882a593Smuzhiyun
3164*4882a593Smuzhiyun /*
3165*4882a593Smuzhiyun * The physical allocation must have been removed from
3166*4882a593Smuzhiyun * the eviction list before trying to free it.
3167*4882a593Smuzhiyun */
3168*4882a593Smuzhiyun mutex_lock(
3169*4882a593Smuzhiyun &alloc->imported.native.kctx->jit_evict_lock);
3170*4882a593Smuzhiyun WARN_ON(!list_empty(&alloc->evict_node));
3171*4882a593Smuzhiyun mutex_unlock(
3172*4882a593Smuzhiyun &alloc->imported.native.kctx->jit_evict_lock);
3173*4882a593Smuzhiyun
3174*4882a593Smuzhiyun kbase_process_page_usage_dec(
3175*4882a593Smuzhiyun alloc->imported.native.kctx,
3176*4882a593Smuzhiyun alloc->imported.native.nr_struct_pages);
3177*4882a593Smuzhiyun }
3178*4882a593Smuzhiyun kbase_free_phy_pages_helper(alloc, alloc->nents);
3179*4882a593Smuzhiyun break;
3180*4882a593Smuzhiyun }
3181*4882a593Smuzhiyun case KBASE_MEM_TYPE_ALIAS: {
3182*4882a593Smuzhiyun /* just call put on the underlying phy allocs */
3183*4882a593Smuzhiyun size_t i;
3184*4882a593Smuzhiyun struct kbase_aliased *aliased;
3185*4882a593Smuzhiyun
3186*4882a593Smuzhiyun aliased = alloc->imported.alias.aliased;
3187*4882a593Smuzhiyun if (aliased) {
3188*4882a593Smuzhiyun for (i = 0; i < alloc->imported.alias.nents; i++)
3189*4882a593Smuzhiyun if (aliased[i].alloc) {
3190*4882a593Smuzhiyun kbase_mem_phy_alloc_gpu_unmapped(aliased[i].alloc);
3191*4882a593Smuzhiyun kbase_mem_phy_alloc_put(aliased[i].alloc);
3192*4882a593Smuzhiyun }
3193*4882a593Smuzhiyun vfree(aliased);
3194*4882a593Smuzhiyun }
3195*4882a593Smuzhiyun break;
3196*4882a593Smuzhiyun }
3197*4882a593Smuzhiyun case KBASE_MEM_TYPE_RAW:
3198*4882a593Smuzhiyun /* raw pages, external cleanup */
3199*4882a593Smuzhiyun break;
3200*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_UMM:
3201*4882a593Smuzhiyun if (!IS_ENABLED(CONFIG_MALI_DMA_BUF_MAP_ON_DEMAND)) {
3202*4882a593Smuzhiyun WARN_ONCE(alloc->imported.umm.current_mapping_usage_count != 1,
3203*4882a593Smuzhiyun "WARNING: expected excatly 1 mapping, got %d",
3204*4882a593Smuzhiyun alloc->imported.umm.current_mapping_usage_count);
3205*4882a593Smuzhiyun dma_buf_unmap_attachment(
3206*4882a593Smuzhiyun alloc->imported.umm.dma_attachment,
3207*4882a593Smuzhiyun alloc->imported.umm.sgt,
3208*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
3209*4882a593Smuzhiyun kbase_remove_dma_buf_usage(alloc->imported.umm.kctx,
3210*4882a593Smuzhiyun alloc);
3211*4882a593Smuzhiyun }
3212*4882a593Smuzhiyun dma_buf_detach(alloc->imported.umm.dma_buf,
3213*4882a593Smuzhiyun alloc->imported.umm.dma_attachment);
3214*4882a593Smuzhiyun dma_buf_put(alloc->imported.umm.dma_buf);
3215*4882a593Smuzhiyun break;
3216*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_USER_BUF:
3217*4882a593Smuzhiyun #if MALI_USE_CSF
3218*4882a593Smuzhiyun kbase_jd_user_buf_unpin_pages(alloc);
3219*4882a593Smuzhiyun #endif
3220*4882a593Smuzhiyun if (alloc->imported.user_buf.mm)
3221*4882a593Smuzhiyun mmdrop(alloc->imported.user_buf.mm);
3222*4882a593Smuzhiyun if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
3223*4882a593Smuzhiyun vfree(alloc->imported.user_buf.pages);
3224*4882a593Smuzhiyun else
3225*4882a593Smuzhiyun kfree(alloc->imported.user_buf.pages);
3226*4882a593Smuzhiyun break;
3227*4882a593Smuzhiyun default:
3228*4882a593Smuzhiyun WARN(1, "Unexecpted free of type %d\n", alloc->type);
3229*4882a593Smuzhiyun break;
3230*4882a593Smuzhiyun }
3231*4882a593Smuzhiyun
3232*4882a593Smuzhiyun /* Free based on allocation type */
3233*4882a593Smuzhiyun if (alloc->properties & KBASE_MEM_PHY_ALLOC_LARGE)
3234*4882a593Smuzhiyun vfree(alloc);
3235*4882a593Smuzhiyun else
3236*4882a593Smuzhiyun kfree(alloc);
3237*4882a593Smuzhiyun }
3238*4882a593Smuzhiyun
3239*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_mem_kref_free);
3240*4882a593Smuzhiyun
kbase_alloc_phy_pages(struct kbase_va_region * reg,size_t vsize,size_t size)3241*4882a593Smuzhiyun int kbase_alloc_phy_pages(struct kbase_va_region *reg, size_t vsize, size_t size)
3242*4882a593Smuzhiyun {
3243*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(reg != NULL);
3244*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(vsize > 0);
3245*4882a593Smuzhiyun
3246*4882a593Smuzhiyun /* validate user provided arguments */
3247*4882a593Smuzhiyun if (size > vsize || vsize > reg->nr_pages)
3248*4882a593Smuzhiyun goto out_term;
3249*4882a593Smuzhiyun
3250*4882a593Smuzhiyun /* Prevent vsize*sizeof from wrapping around.
3251*4882a593Smuzhiyun * For instance, if vsize is 2**29+1, we'll allocate 1 byte and the alloc won't fail.
3252*4882a593Smuzhiyun */
3253*4882a593Smuzhiyun if ((size_t) vsize > ((size_t) -1 / sizeof(*reg->cpu_alloc->pages)))
3254*4882a593Smuzhiyun goto out_term;
3255*4882a593Smuzhiyun
3256*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(vsize != 0);
3257*4882a593Smuzhiyun
3258*4882a593Smuzhiyun if (kbase_alloc_phy_pages_helper(reg->cpu_alloc, size) != 0)
3259*4882a593Smuzhiyun goto out_term;
3260*4882a593Smuzhiyun
3261*4882a593Smuzhiyun reg->cpu_alloc->reg = reg;
3262*4882a593Smuzhiyun if (reg->cpu_alloc != reg->gpu_alloc) {
3263*4882a593Smuzhiyun if (kbase_alloc_phy_pages_helper(reg->gpu_alloc, size) != 0)
3264*4882a593Smuzhiyun goto out_rollback;
3265*4882a593Smuzhiyun reg->gpu_alloc->reg = reg;
3266*4882a593Smuzhiyun }
3267*4882a593Smuzhiyun
3268*4882a593Smuzhiyun return 0;
3269*4882a593Smuzhiyun
3270*4882a593Smuzhiyun out_rollback:
3271*4882a593Smuzhiyun kbase_free_phy_pages_helper(reg->cpu_alloc, size);
3272*4882a593Smuzhiyun out_term:
3273*4882a593Smuzhiyun return -1;
3274*4882a593Smuzhiyun }
3275*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages);
3276*4882a593Smuzhiyun
kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc * alloc,enum kbase_page_status status)3277*4882a593Smuzhiyun void kbase_set_phy_alloc_page_status(struct kbase_mem_phy_alloc *alloc,
3278*4882a593Smuzhiyun enum kbase_page_status status)
3279*4882a593Smuzhiyun {
3280*4882a593Smuzhiyun u32 i = 0;
3281*4882a593Smuzhiyun
3282*4882a593Smuzhiyun for (; i < alloc->nents; i++) {
3283*4882a593Smuzhiyun struct tagged_addr phys = alloc->pages[i];
3284*4882a593Smuzhiyun struct kbase_page_metadata *page_md = kbase_page_private(as_page(phys));
3285*4882a593Smuzhiyun
3286*4882a593Smuzhiyun /* Skip the 4KB page that is part of a large page, as the large page is
3287*4882a593Smuzhiyun * excluded from the migration process.
3288*4882a593Smuzhiyun */
3289*4882a593Smuzhiyun if (is_huge(phys) || is_partial(phys))
3290*4882a593Smuzhiyun continue;
3291*4882a593Smuzhiyun
3292*4882a593Smuzhiyun if (!page_md)
3293*4882a593Smuzhiyun continue;
3294*4882a593Smuzhiyun
3295*4882a593Smuzhiyun spin_lock(&page_md->migrate_lock);
3296*4882a593Smuzhiyun page_md->status = PAGE_STATUS_SET(page_md->status, (u8)status);
3297*4882a593Smuzhiyun spin_unlock(&page_md->migrate_lock);
3298*4882a593Smuzhiyun }
3299*4882a593Smuzhiyun }
3300*4882a593Smuzhiyun
kbase_check_alloc_flags(unsigned long flags)3301*4882a593Smuzhiyun bool kbase_check_alloc_flags(unsigned long flags)
3302*4882a593Smuzhiyun {
3303*4882a593Smuzhiyun /* Only known input flags should be set. */
3304*4882a593Smuzhiyun if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
3305*4882a593Smuzhiyun return false;
3306*4882a593Smuzhiyun
3307*4882a593Smuzhiyun /* At least one flag should be set */
3308*4882a593Smuzhiyun if (flags == 0)
3309*4882a593Smuzhiyun return false;
3310*4882a593Smuzhiyun
3311*4882a593Smuzhiyun /* Either the GPU or CPU must be reading from the allocated memory */
3312*4882a593Smuzhiyun if ((flags & (BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD)) == 0)
3313*4882a593Smuzhiyun return false;
3314*4882a593Smuzhiyun
3315*4882a593Smuzhiyun /* Either the GPU or CPU must be writing to the allocated memory */
3316*4882a593Smuzhiyun if ((flags & (BASE_MEM_PROT_CPU_WR | BASE_MEM_PROT_GPU_WR)) == 0)
3317*4882a593Smuzhiyun return false;
3318*4882a593Smuzhiyun
3319*4882a593Smuzhiyun /* GPU executable memory cannot:
3320*4882a593Smuzhiyun * - Be written by the GPU
3321*4882a593Smuzhiyun * - Be grown on GPU page fault
3322*4882a593Smuzhiyun */
3323*4882a593Smuzhiyun if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
3324*4882a593Smuzhiyun (BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF)))
3325*4882a593Smuzhiyun return false;
3326*4882a593Smuzhiyun
3327*4882a593Smuzhiyun #if !MALI_USE_CSF
3328*4882a593Smuzhiyun /* GPU executable memory also cannot have the top of its initial
3329*4882a593Smuzhiyun * commit aligned to 'extension'
3330*4882a593Smuzhiyun */
3331*4882a593Smuzhiyun if ((flags & BASE_MEM_PROT_GPU_EX) && (flags &
3332*4882a593Smuzhiyun BASE_MEM_TILER_ALIGN_TOP))
3333*4882a593Smuzhiyun return false;
3334*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3335*4882a593Smuzhiyun
3336*4882a593Smuzhiyun /* To have an allocation lie within a 4GB chunk is required only for
3337*4882a593Smuzhiyun * TLS memory, which will never be used to contain executable code.
3338*4882a593Smuzhiyun */
3339*4882a593Smuzhiyun if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
3340*4882a593Smuzhiyun BASE_MEM_PROT_GPU_EX))
3341*4882a593Smuzhiyun return false;
3342*4882a593Smuzhiyun
3343*4882a593Smuzhiyun #if !MALI_USE_CSF
3344*4882a593Smuzhiyun /* TLS memory should also not be used for tiler heap */
3345*4882a593Smuzhiyun if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) && (flags &
3346*4882a593Smuzhiyun BASE_MEM_TILER_ALIGN_TOP))
3347*4882a593Smuzhiyun return false;
3348*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3349*4882a593Smuzhiyun
3350*4882a593Smuzhiyun /* GPU should have at least read or write access otherwise there is no
3351*4882a593Smuzhiyun * reason for allocating.
3352*4882a593Smuzhiyun */
3353*4882a593Smuzhiyun if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
3354*4882a593Smuzhiyun return false;
3355*4882a593Smuzhiyun
3356*4882a593Smuzhiyun /* BASE_MEM_IMPORT_SHARED is only valid for imported memory */
3357*4882a593Smuzhiyun if ((flags & BASE_MEM_IMPORT_SHARED) == BASE_MEM_IMPORT_SHARED)
3358*4882a593Smuzhiyun return false;
3359*4882a593Smuzhiyun
3360*4882a593Smuzhiyun /* BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP is only valid for imported memory
3361*4882a593Smuzhiyun */
3362*4882a593Smuzhiyun if ((flags & BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP) ==
3363*4882a593Smuzhiyun BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP)
3364*4882a593Smuzhiyun return false;
3365*4882a593Smuzhiyun
3366*4882a593Smuzhiyun /* Should not combine BASE_MEM_COHERENT_LOCAL with
3367*4882a593Smuzhiyun * BASE_MEM_COHERENT_SYSTEM
3368*4882a593Smuzhiyun */
3369*4882a593Smuzhiyun if ((flags & (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM)) ==
3370*4882a593Smuzhiyun (BASE_MEM_COHERENT_LOCAL | BASE_MEM_COHERENT_SYSTEM))
3371*4882a593Smuzhiyun return false;
3372*4882a593Smuzhiyun
3373*4882a593Smuzhiyun #if MALI_USE_CSF
3374*4882a593Smuzhiyun if ((flags & BASE_MEM_SAME_VA) && (flags & (BASE_MEM_FIXABLE | BASE_MEM_FIXED)))
3375*4882a593Smuzhiyun return false;
3376*4882a593Smuzhiyun
3377*4882a593Smuzhiyun if ((flags & BASE_MEM_FIXABLE) && (flags & BASE_MEM_FIXED))
3378*4882a593Smuzhiyun return false;
3379*4882a593Smuzhiyun #endif
3380*4882a593Smuzhiyun
3381*4882a593Smuzhiyun return true;
3382*4882a593Smuzhiyun }
3383*4882a593Smuzhiyun
kbase_check_import_flags(unsigned long flags)3384*4882a593Smuzhiyun bool kbase_check_import_flags(unsigned long flags)
3385*4882a593Smuzhiyun {
3386*4882a593Smuzhiyun /* Only known input flags should be set. */
3387*4882a593Smuzhiyun if (flags & ~BASE_MEM_FLAGS_INPUT_MASK)
3388*4882a593Smuzhiyun return false;
3389*4882a593Smuzhiyun
3390*4882a593Smuzhiyun /* At least one flag should be set */
3391*4882a593Smuzhiyun if (flags == 0)
3392*4882a593Smuzhiyun return false;
3393*4882a593Smuzhiyun
3394*4882a593Smuzhiyun /* Imported memory cannot be GPU executable */
3395*4882a593Smuzhiyun if (flags & BASE_MEM_PROT_GPU_EX)
3396*4882a593Smuzhiyun return false;
3397*4882a593Smuzhiyun
3398*4882a593Smuzhiyun /* Imported memory cannot grow on page fault */
3399*4882a593Smuzhiyun if (flags & BASE_MEM_GROW_ON_GPF)
3400*4882a593Smuzhiyun return false;
3401*4882a593Smuzhiyun
3402*4882a593Smuzhiyun #if MALI_USE_CSF
3403*4882a593Smuzhiyun /* Imported memory cannot be fixed */
3404*4882a593Smuzhiyun if ((flags & (BASE_MEM_FIXED | BASE_MEM_FIXABLE)))
3405*4882a593Smuzhiyun return false;
3406*4882a593Smuzhiyun #else
3407*4882a593Smuzhiyun /* Imported memory cannot be aligned to the end of its initial commit */
3408*4882a593Smuzhiyun if (flags & BASE_MEM_TILER_ALIGN_TOP)
3409*4882a593Smuzhiyun return false;
3410*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3411*4882a593Smuzhiyun
3412*4882a593Smuzhiyun /* GPU should have at least read or write access otherwise there is no
3413*4882a593Smuzhiyun * reason for importing.
3414*4882a593Smuzhiyun */
3415*4882a593Smuzhiyun if ((flags & (BASE_MEM_PROT_GPU_RD | BASE_MEM_PROT_GPU_WR)) == 0)
3416*4882a593Smuzhiyun return false;
3417*4882a593Smuzhiyun
3418*4882a593Smuzhiyun /* Protected memory cannot be read by the CPU */
3419*4882a593Smuzhiyun if ((flags & BASE_MEM_PROTECTED) && (flags & BASE_MEM_PROT_CPU_RD))
3420*4882a593Smuzhiyun return false;
3421*4882a593Smuzhiyun
3422*4882a593Smuzhiyun return true;
3423*4882a593Smuzhiyun }
3424*4882a593Smuzhiyun
kbase_check_alloc_sizes(struct kbase_context * kctx,unsigned long flags,u64 va_pages,u64 commit_pages,u64 large_extension)3425*4882a593Smuzhiyun int kbase_check_alloc_sizes(struct kbase_context *kctx, unsigned long flags,
3426*4882a593Smuzhiyun u64 va_pages, u64 commit_pages, u64 large_extension)
3427*4882a593Smuzhiyun {
3428*4882a593Smuzhiyun struct device *dev = kctx->kbdev->dev;
3429*4882a593Smuzhiyun int gpu_pc_bits = kctx->kbdev->gpu_props.props.core_props.log2_program_counter_size;
3430*4882a593Smuzhiyun u64 gpu_pc_pages_max = 1ULL << gpu_pc_bits >> PAGE_SHIFT;
3431*4882a593Smuzhiyun struct kbase_va_region test_reg;
3432*4882a593Smuzhiyun
3433*4882a593Smuzhiyun /* kbase_va_region's extension member can be of variable size, so check against that type */
3434*4882a593Smuzhiyun test_reg.extension = large_extension;
3435*4882a593Smuzhiyun
3436*4882a593Smuzhiyun #define KBASE_MSG_PRE "GPU allocation attempted with "
3437*4882a593Smuzhiyun
3438*4882a593Smuzhiyun if (va_pages == 0) {
3439*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE "0 va_pages!");
3440*4882a593Smuzhiyun return -EINVAL;
3441*4882a593Smuzhiyun }
3442*4882a593Smuzhiyun
3443*4882a593Smuzhiyun if (va_pages > KBASE_MEM_ALLOC_MAX_SIZE) {
3444*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE "va_pages==%lld larger than KBASE_MEM_ALLOC_MAX_SIZE!",
3445*4882a593Smuzhiyun (unsigned long long)va_pages);
3446*4882a593Smuzhiyun return -ENOMEM;
3447*4882a593Smuzhiyun }
3448*4882a593Smuzhiyun
3449*4882a593Smuzhiyun /* Note: commit_pages is checked against va_pages during
3450*4882a593Smuzhiyun * kbase_alloc_phy_pages()
3451*4882a593Smuzhiyun */
3452*4882a593Smuzhiyun
3453*4882a593Smuzhiyun /* Limit GPU executable allocs to GPU PC size */
3454*4882a593Smuzhiyun if ((flags & BASE_MEM_PROT_GPU_EX) && (va_pages > gpu_pc_pages_max)) {
3455*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_PROT_GPU_EX and va_pages==%lld larger than GPU PC range %lld",
3456*4882a593Smuzhiyun (unsigned long long)va_pages,
3457*4882a593Smuzhiyun (unsigned long long)gpu_pc_pages_max);
3458*4882a593Smuzhiyun
3459*4882a593Smuzhiyun return -EINVAL;
3460*4882a593Smuzhiyun }
3461*4882a593Smuzhiyun
3462*4882a593Smuzhiyun if ((flags & BASE_MEM_GROW_ON_GPF) && (test_reg.extension == 0)) {
3463*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE
3464*4882a593Smuzhiyun "BASE_MEM_GROW_ON_GPF but extension == 0\n");
3465*4882a593Smuzhiyun return -EINVAL;
3466*4882a593Smuzhiyun }
3467*4882a593Smuzhiyun
3468*4882a593Smuzhiyun #if !MALI_USE_CSF
3469*4882a593Smuzhiyun if ((flags & BASE_MEM_TILER_ALIGN_TOP) && (test_reg.extension == 0)) {
3470*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE
3471*4882a593Smuzhiyun "BASE_MEM_TILER_ALIGN_TOP but extension == 0\n");
3472*4882a593Smuzhiyun return -EINVAL;
3473*4882a593Smuzhiyun }
3474*4882a593Smuzhiyun
3475*4882a593Smuzhiyun if (!(flags & (BASE_MEM_GROW_ON_GPF | BASE_MEM_TILER_ALIGN_TOP)) &&
3476*4882a593Smuzhiyun test_reg.extension != 0) {
3477*4882a593Smuzhiyun dev_warn(
3478*4882a593Smuzhiyun dev, KBASE_MSG_PRE
3479*4882a593Smuzhiyun "neither BASE_MEM_GROW_ON_GPF nor BASE_MEM_TILER_ALIGN_TOP set but extension != 0\n");
3480*4882a593Smuzhiyun return -EINVAL;
3481*4882a593Smuzhiyun }
3482*4882a593Smuzhiyun #else
3483*4882a593Smuzhiyun if (!(flags & BASE_MEM_GROW_ON_GPF) && test_reg.extension != 0) {
3484*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE
3485*4882a593Smuzhiyun "BASE_MEM_GROW_ON_GPF not set but extension != 0\n");
3486*4882a593Smuzhiyun return -EINVAL;
3487*4882a593Smuzhiyun }
3488*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3489*4882a593Smuzhiyun
3490*4882a593Smuzhiyun #if !MALI_USE_CSF
3491*4882a593Smuzhiyun /* BASE_MEM_TILER_ALIGN_TOP memory has a number of restrictions */
3492*4882a593Smuzhiyun if (flags & BASE_MEM_TILER_ALIGN_TOP) {
3493*4882a593Smuzhiyun #define KBASE_MSG_PRE_FLAG KBASE_MSG_PRE "BASE_MEM_TILER_ALIGN_TOP and "
3494*4882a593Smuzhiyun unsigned long small_extension;
3495*4882a593Smuzhiyun
3496*4882a593Smuzhiyun if (large_extension >
3497*4882a593Smuzhiyun BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES) {
3498*4882a593Smuzhiyun dev_warn(dev,
3499*4882a593Smuzhiyun KBASE_MSG_PRE_FLAG
3500*4882a593Smuzhiyun "extension==%lld pages exceeds limit %lld",
3501*4882a593Smuzhiyun (unsigned long long)large_extension,
3502*4882a593Smuzhiyun BASE_MEM_TILER_ALIGN_TOP_EXTENSION_MAX_PAGES);
3503*4882a593Smuzhiyun return -EINVAL;
3504*4882a593Smuzhiyun }
3505*4882a593Smuzhiyun /* For use with is_power_of_2, which takes unsigned long, so
3506*4882a593Smuzhiyun * must ensure e.g. on 32-bit kernel it'll fit in that type
3507*4882a593Smuzhiyun */
3508*4882a593Smuzhiyun small_extension = (unsigned long)large_extension;
3509*4882a593Smuzhiyun
3510*4882a593Smuzhiyun if (!is_power_of_2(small_extension)) {
3511*4882a593Smuzhiyun dev_warn(dev,
3512*4882a593Smuzhiyun KBASE_MSG_PRE_FLAG
3513*4882a593Smuzhiyun "extension==%ld not a non-zero power of 2",
3514*4882a593Smuzhiyun small_extension);
3515*4882a593Smuzhiyun return -EINVAL;
3516*4882a593Smuzhiyun }
3517*4882a593Smuzhiyun
3518*4882a593Smuzhiyun if (commit_pages > large_extension) {
3519*4882a593Smuzhiyun dev_warn(dev,
3520*4882a593Smuzhiyun KBASE_MSG_PRE_FLAG
3521*4882a593Smuzhiyun "commit_pages==%ld exceeds extension==%ld",
3522*4882a593Smuzhiyun (unsigned long)commit_pages,
3523*4882a593Smuzhiyun (unsigned long)large_extension);
3524*4882a593Smuzhiyun return -EINVAL;
3525*4882a593Smuzhiyun }
3526*4882a593Smuzhiyun #undef KBASE_MSG_PRE_FLAG
3527*4882a593Smuzhiyun }
3528*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3529*4882a593Smuzhiyun
3530*4882a593Smuzhiyun if ((flags & BASE_MEM_GPU_VA_SAME_4GB_PAGE) &&
3531*4882a593Smuzhiyun (va_pages > (BASE_MEM_PFN_MASK_4GB + 1))) {
3532*4882a593Smuzhiyun dev_warn(dev, KBASE_MSG_PRE "BASE_MEM_GPU_VA_SAME_4GB_PAGE and va_pages==%lld greater than that needed for 4GB space",
3533*4882a593Smuzhiyun (unsigned long long)va_pages);
3534*4882a593Smuzhiyun return -EINVAL;
3535*4882a593Smuzhiyun }
3536*4882a593Smuzhiyun
3537*4882a593Smuzhiyun return 0;
3538*4882a593Smuzhiyun #undef KBASE_MSG_PRE
3539*4882a593Smuzhiyun }
3540*4882a593Smuzhiyun
kbase_gpu_vm_lock(struct kbase_context * kctx)3541*4882a593Smuzhiyun void kbase_gpu_vm_lock(struct kbase_context *kctx)
3542*4882a593Smuzhiyun {
3543*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
3544*4882a593Smuzhiyun mutex_lock(&kctx->reg_lock);
3545*4882a593Smuzhiyun }
3546*4882a593Smuzhiyun
3547*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_gpu_vm_lock);
3548*4882a593Smuzhiyun
kbase_gpu_vm_unlock(struct kbase_context * kctx)3549*4882a593Smuzhiyun void kbase_gpu_vm_unlock(struct kbase_context *kctx)
3550*4882a593Smuzhiyun {
3551*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(kctx != NULL);
3552*4882a593Smuzhiyun mutex_unlock(&kctx->reg_lock);
3553*4882a593Smuzhiyun }
3554*4882a593Smuzhiyun
3555*4882a593Smuzhiyun KBASE_EXPORT_TEST_API(kbase_gpu_vm_unlock);
3556*4882a593Smuzhiyun
3557*4882a593Smuzhiyun #if IS_ENABLED(CONFIG_DEBUG_FS)
3558*4882a593Smuzhiyun struct kbase_jit_debugfs_data {
3559*4882a593Smuzhiyun int (*func)(struct kbase_jit_debugfs_data *data);
3560*4882a593Smuzhiyun struct mutex lock;
3561*4882a593Smuzhiyun struct kbase_context *kctx;
3562*4882a593Smuzhiyun u64 active_value;
3563*4882a593Smuzhiyun u64 pool_value;
3564*4882a593Smuzhiyun u64 destroy_value;
3565*4882a593Smuzhiyun char buffer[50];
3566*4882a593Smuzhiyun };
3567*4882a593Smuzhiyun
kbase_jit_debugfs_common_open(struct inode * inode,struct file * file,int (* func)(struct kbase_jit_debugfs_data *))3568*4882a593Smuzhiyun static int kbase_jit_debugfs_common_open(struct inode *inode,
3569*4882a593Smuzhiyun struct file *file, int (*func)(struct kbase_jit_debugfs_data *))
3570*4882a593Smuzhiyun {
3571*4882a593Smuzhiyun struct kbase_jit_debugfs_data *data;
3572*4882a593Smuzhiyun
3573*4882a593Smuzhiyun data = kzalloc(sizeof(*data), GFP_KERNEL);
3574*4882a593Smuzhiyun if (!data)
3575*4882a593Smuzhiyun return -ENOMEM;
3576*4882a593Smuzhiyun
3577*4882a593Smuzhiyun data->func = func;
3578*4882a593Smuzhiyun mutex_init(&data->lock);
3579*4882a593Smuzhiyun data->kctx = (struct kbase_context *) inode->i_private;
3580*4882a593Smuzhiyun
3581*4882a593Smuzhiyun file->private_data = data;
3582*4882a593Smuzhiyun
3583*4882a593Smuzhiyun return nonseekable_open(inode, file);
3584*4882a593Smuzhiyun }
3585*4882a593Smuzhiyun
kbase_jit_debugfs_common_read(struct file * file,char __user * buf,size_t len,loff_t * ppos)3586*4882a593Smuzhiyun static ssize_t kbase_jit_debugfs_common_read(struct file *file,
3587*4882a593Smuzhiyun char __user *buf, size_t len, loff_t *ppos)
3588*4882a593Smuzhiyun {
3589*4882a593Smuzhiyun struct kbase_jit_debugfs_data *data;
3590*4882a593Smuzhiyun size_t size;
3591*4882a593Smuzhiyun int ret;
3592*4882a593Smuzhiyun
3593*4882a593Smuzhiyun data = (struct kbase_jit_debugfs_data *) file->private_data;
3594*4882a593Smuzhiyun mutex_lock(&data->lock);
3595*4882a593Smuzhiyun
3596*4882a593Smuzhiyun if (*ppos) {
3597*4882a593Smuzhiyun size = strnlen(data->buffer, sizeof(data->buffer));
3598*4882a593Smuzhiyun } else {
3599*4882a593Smuzhiyun if (!data->func) {
3600*4882a593Smuzhiyun ret = -EACCES;
3601*4882a593Smuzhiyun goto out_unlock;
3602*4882a593Smuzhiyun }
3603*4882a593Smuzhiyun
3604*4882a593Smuzhiyun if (data->func(data)) {
3605*4882a593Smuzhiyun ret = -EACCES;
3606*4882a593Smuzhiyun goto out_unlock;
3607*4882a593Smuzhiyun }
3608*4882a593Smuzhiyun
3609*4882a593Smuzhiyun size = scnprintf(data->buffer, sizeof(data->buffer),
3610*4882a593Smuzhiyun "%llu,%llu,%llu\n", data->active_value,
3611*4882a593Smuzhiyun data->pool_value, data->destroy_value);
3612*4882a593Smuzhiyun }
3613*4882a593Smuzhiyun
3614*4882a593Smuzhiyun ret = simple_read_from_buffer(buf, len, ppos, data->buffer, size);
3615*4882a593Smuzhiyun
3616*4882a593Smuzhiyun out_unlock:
3617*4882a593Smuzhiyun mutex_unlock(&data->lock);
3618*4882a593Smuzhiyun return ret;
3619*4882a593Smuzhiyun }
3620*4882a593Smuzhiyun
kbase_jit_debugfs_common_release(struct inode * inode,struct file * file)3621*4882a593Smuzhiyun static int kbase_jit_debugfs_common_release(struct inode *inode,
3622*4882a593Smuzhiyun struct file *file)
3623*4882a593Smuzhiyun {
3624*4882a593Smuzhiyun kfree(file->private_data);
3625*4882a593Smuzhiyun return 0;
3626*4882a593Smuzhiyun }
3627*4882a593Smuzhiyun
3628*4882a593Smuzhiyun #define KBASE_JIT_DEBUGFS_DECLARE(__fops, __func) \
3629*4882a593Smuzhiyun static int __fops ## _open(struct inode *inode, struct file *file) \
3630*4882a593Smuzhiyun { \
3631*4882a593Smuzhiyun return kbase_jit_debugfs_common_open(inode, file, __func); \
3632*4882a593Smuzhiyun } \
3633*4882a593Smuzhiyun static const struct file_operations __fops = { \
3634*4882a593Smuzhiyun .owner = THIS_MODULE, \
3635*4882a593Smuzhiyun .open = __fops ## _open, \
3636*4882a593Smuzhiyun .release = kbase_jit_debugfs_common_release, \
3637*4882a593Smuzhiyun .read = kbase_jit_debugfs_common_read, \
3638*4882a593Smuzhiyun .write = NULL, \
3639*4882a593Smuzhiyun .llseek = generic_file_llseek, \
3640*4882a593Smuzhiyun }
3641*4882a593Smuzhiyun
kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data * data)3642*4882a593Smuzhiyun static int kbase_jit_debugfs_count_get(struct kbase_jit_debugfs_data *data)
3643*4882a593Smuzhiyun {
3644*4882a593Smuzhiyun struct kbase_context *kctx = data->kctx;
3645*4882a593Smuzhiyun struct list_head *tmp;
3646*4882a593Smuzhiyun
3647*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3648*4882a593Smuzhiyun list_for_each(tmp, &kctx->jit_active_head) {
3649*4882a593Smuzhiyun data->active_value++;
3650*4882a593Smuzhiyun }
3651*4882a593Smuzhiyun
3652*4882a593Smuzhiyun list_for_each(tmp, &kctx->jit_pool_head) {
3653*4882a593Smuzhiyun data->pool_value++;
3654*4882a593Smuzhiyun }
3655*4882a593Smuzhiyun
3656*4882a593Smuzhiyun list_for_each(tmp, &kctx->jit_destroy_head) {
3657*4882a593Smuzhiyun data->destroy_value++;
3658*4882a593Smuzhiyun }
3659*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3660*4882a593Smuzhiyun
3661*4882a593Smuzhiyun return 0;
3662*4882a593Smuzhiyun }
3663*4882a593Smuzhiyun KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_count_fops,
3664*4882a593Smuzhiyun kbase_jit_debugfs_count_get);
3665*4882a593Smuzhiyun
kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data * data)3666*4882a593Smuzhiyun static int kbase_jit_debugfs_vm_get(struct kbase_jit_debugfs_data *data)
3667*4882a593Smuzhiyun {
3668*4882a593Smuzhiyun struct kbase_context *kctx = data->kctx;
3669*4882a593Smuzhiyun struct kbase_va_region *reg;
3670*4882a593Smuzhiyun
3671*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3672*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
3673*4882a593Smuzhiyun data->active_value += reg->nr_pages;
3674*4882a593Smuzhiyun }
3675*4882a593Smuzhiyun
3676*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
3677*4882a593Smuzhiyun data->pool_value += reg->nr_pages;
3678*4882a593Smuzhiyun }
3679*4882a593Smuzhiyun
3680*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
3681*4882a593Smuzhiyun data->destroy_value += reg->nr_pages;
3682*4882a593Smuzhiyun }
3683*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3684*4882a593Smuzhiyun
3685*4882a593Smuzhiyun return 0;
3686*4882a593Smuzhiyun }
3687*4882a593Smuzhiyun KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_vm_fops,
3688*4882a593Smuzhiyun kbase_jit_debugfs_vm_get);
3689*4882a593Smuzhiyun
kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data * data)3690*4882a593Smuzhiyun static int kbase_jit_debugfs_phys_get(struct kbase_jit_debugfs_data *data)
3691*4882a593Smuzhiyun {
3692*4882a593Smuzhiyun struct kbase_context *kctx = data->kctx;
3693*4882a593Smuzhiyun struct kbase_va_region *reg;
3694*4882a593Smuzhiyun
3695*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3696*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
3697*4882a593Smuzhiyun data->active_value += reg->gpu_alloc->nents;
3698*4882a593Smuzhiyun }
3699*4882a593Smuzhiyun
3700*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_pool_head, jit_node) {
3701*4882a593Smuzhiyun data->pool_value += reg->gpu_alloc->nents;
3702*4882a593Smuzhiyun }
3703*4882a593Smuzhiyun
3704*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_destroy_head, jit_node) {
3705*4882a593Smuzhiyun data->destroy_value += reg->gpu_alloc->nents;
3706*4882a593Smuzhiyun }
3707*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3708*4882a593Smuzhiyun
3709*4882a593Smuzhiyun return 0;
3710*4882a593Smuzhiyun }
3711*4882a593Smuzhiyun KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_phys_fops,
3712*4882a593Smuzhiyun kbase_jit_debugfs_phys_get);
3713*4882a593Smuzhiyun
3714*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data * data)3715*4882a593Smuzhiyun static int kbase_jit_debugfs_used_get(struct kbase_jit_debugfs_data *data)
3716*4882a593Smuzhiyun {
3717*4882a593Smuzhiyun struct kbase_context *kctx = data->kctx;
3718*4882a593Smuzhiyun struct kbase_va_region *reg;
3719*4882a593Smuzhiyun
3720*4882a593Smuzhiyun #if !MALI_USE_CSF
3721*4882a593Smuzhiyun mutex_lock(&kctx->jctx.lock);
3722*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3723*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3724*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
3725*4882a593Smuzhiyun data->active_value += reg->used_pages;
3726*4882a593Smuzhiyun }
3727*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3728*4882a593Smuzhiyun #if !MALI_USE_CSF
3729*4882a593Smuzhiyun mutex_unlock(&kctx->jctx.lock);
3730*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3731*4882a593Smuzhiyun
3732*4882a593Smuzhiyun return 0;
3733*4882a593Smuzhiyun }
3734*4882a593Smuzhiyun
3735*4882a593Smuzhiyun KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_used_fops,
3736*4882a593Smuzhiyun kbase_jit_debugfs_used_get);
3737*4882a593Smuzhiyun
3738*4882a593Smuzhiyun static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
3739*4882a593Smuzhiyun struct kbase_va_region *reg, size_t pages_needed,
3740*4882a593Smuzhiyun size_t *freed, bool shrink);
3741*4882a593Smuzhiyun
kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data * data)3742*4882a593Smuzhiyun static int kbase_jit_debugfs_trim_get(struct kbase_jit_debugfs_data *data)
3743*4882a593Smuzhiyun {
3744*4882a593Smuzhiyun struct kbase_context *kctx = data->kctx;
3745*4882a593Smuzhiyun struct kbase_va_region *reg;
3746*4882a593Smuzhiyun
3747*4882a593Smuzhiyun #if !MALI_USE_CSF
3748*4882a593Smuzhiyun mutex_lock(&kctx->jctx.lock);
3749*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3750*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
3751*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3752*4882a593Smuzhiyun list_for_each_entry(reg, &kctx->jit_active_head, jit_node) {
3753*4882a593Smuzhiyun int err;
3754*4882a593Smuzhiyun size_t freed = 0u;
3755*4882a593Smuzhiyun
3756*4882a593Smuzhiyun err = kbase_mem_jit_trim_pages_from_region(kctx, reg,
3757*4882a593Smuzhiyun SIZE_MAX, &freed, false);
3758*4882a593Smuzhiyun
3759*4882a593Smuzhiyun if (err) {
3760*4882a593Smuzhiyun /* Failed to calculate, try the next region */
3761*4882a593Smuzhiyun continue;
3762*4882a593Smuzhiyun }
3763*4882a593Smuzhiyun
3764*4882a593Smuzhiyun data->active_value += freed;
3765*4882a593Smuzhiyun }
3766*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3767*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
3768*4882a593Smuzhiyun #if !MALI_USE_CSF
3769*4882a593Smuzhiyun mutex_unlock(&kctx->jctx.lock);
3770*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3771*4882a593Smuzhiyun
3772*4882a593Smuzhiyun return 0;
3773*4882a593Smuzhiyun }
3774*4882a593Smuzhiyun
3775*4882a593Smuzhiyun KBASE_JIT_DEBUGFS_DECLARE(kbase_jit_debugfs_trim_fops,
3776*4882a593Smuzhiyun kbase_jit_debugfs_trim_get);
3777*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
3778*4882a593Smuzhiyun
kbase_jit_debugfs_init(struct kbase_context * kctx)3779*4882a593Smuzhiyun void kbase_jit_debugfs_init(struct kbase_context *kctx)
3780*4882a593Smuzhiyun {
3781*4882a593Smuzhiyun /* prevent unprivileged use of debug file system
3782*4882a593Smuzhiyun * in old kernel version
3783*4882a593Smuzhiyun */
3784*4882a593Smuzhiyun const mode_t mode = 0444;
3785*4882a593Smuzhiyun
3786*4882a593Smuzhiyun /* Caller already ensures this, but we keep the pattern for
3787*4882a593Smuzhiyun * maintenance safety.
3788*4882a593Smuzhiyun */
3789*4882a593Smuzhiyun if (WARN_ON(!kctx) ||
3790*4882a593Smuzhiyun WARN_ON(IS_ERR_OR_NULL(kctx->kctx_dentry)))
3791*4882a593Smuzhiyun return;
3792*4882a593Smuzhiyun
3793*4882a593Smuzhiyun
3794*4882a593Smuzhiyun
3795*4882a593Smuzhiyun /* Debugfs entry for getting the number of JIT allocations. */
3796*4882a593Smuzhiyun debugfs_create_file("mem_jit_count", mode, kctx->kctx_dentry,
3797*4882a593Smuzhiyun kctx, &kbase_jit_debugfs_count_fops);
3798*4882a593Smuzhiyun
3799*4882a593Smuzhiyun /*
3800*4882a593Smuzhiyun * Debugfs entry for getting the total number of virtual pages
3801*4882a593Smuzhiyun * used by JIT allocations.
3802*4882a593Smuzhiyun */
3803*4882a593Smuzhiyun debugfs_create_file("mem_jit_vm", mode, kctx->kctx_dentry,
3804*4882a593Smuzhiyun kctx, &kbase_jit_debugfs_vm_fops);
3805*4882a593Smuzhiyun
3806*4882a593Smuzhiyun /*
3807*4882a593Smuzhiyun * Debugfs entry for getting the number of physical pages used
3808*4882a593Smuzhiyun * by JIT allocations.
3809*4882a593Smuzhiyun */
3810*4882a593Smuzhiyun debugfs_create_file("mem_jit_phys", mode, kctx->kctx_dentry,
3811*4882a593Smuzhiyun kctx, &kbase_jit_debugfs_phys_fops);
3812*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
3813*4882a593Smuzhiyun /*
3814*4882a593Smuzhiyun * Debugfs entry for getting the number of pages used
3815*4882a593Smuzhiyun * by JIT allocations for estimating the physical pressure
3816*4882a593Smuzhiyun * limit.
3817*4882a593Smuzhiyun */
3818*4882a593Smuzhiyun debugfs_create_file("mem_jit_used", mode, kctx->kctx_dentry,
3819*4882a593Smuzhiyun kctx, &kbase_jit_debugfs_used_fops);
3820*4882a593Smuzhiyun
3821*4882a593Smuzhiyun /*
3822*4882a593Smuzhiyun * Debugfs entry for getting the number of pages that could
3823*4882a593Smuzhiyun * be trimmed to free space for more JIT allocations.
3824*4882a593Smuzhiyun */
3825*4882a593Smuzhiyun debugfs_create_file("mem_jit_trim", mode, kctx->kctx_dentry,
3826*4882a593Smuzhiyun kctx, &kbase_jit_debugfs_trim_fops);
3827*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
3828*4882a593Smuzhiyun }
3829*4882a593Smuzhiyun #endif /* CONFIG_DEBUG_FS */
3830*4882a593Smuzhiyun
3831*4882a593Smuzhiyun /**
3832*4882a593Smuzhiyun * kbase_jit_destroy_worker - Deferred worker which frees JIT allocations
3833*4882a593Smuzhiyun * @work: Work item
3834*4882a593Smuzhiyun *
3835*4882a593Smuzhiyun * This function does the work of freeing JIT allocations whose physical
3836*4882a593Smuzhiyun * backing has been released.
3837*4882a593Smuzhiyun */
kbase_jit_destroy_worker(struct work_struct * work)3838*4882a593Smuzhiyun static void kbase_jit_destroy_worker(struct work_struct *work)
3839*4882a593Smuzhiyun {
3840*4882a593Smuzhiyun struct kbase_context *kctx;
3841*4882a593Smuzhiyun struct kbase_va_region *reg;
3842*4882a593Smuzhiyun
3843*4882a593Smuzhiyun kctx = container_of(work, struct kbase_context, jit_work);
3844*4882a593Smuzhiyun do {
3845*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3846*4882a593Smuzhiyun if (list_empty(&kctx->jit_destroy_head)) {
3847*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3848*4882a593Smuzhiyun break;
3849*4882a593Smuzhiyun }
3850*4882a593Smuzhiyun
3851*4882a593Smuzhiyun reg = list_first_entry(&kctx->jit_destroy_head,
3852*4882a593Smuzhiyun struct kbase_va_region, jit_node);
3853*4882a593Smuzhiyun
3854*4882a593Smuzhiyun list_del(®->jit_node);
3855*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3856*4882a593Smuzhiyun
3857*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
3858*4882a593Smuzhiyun
3859*4882a593Smuzhiyun /*
3860*4882a593Smuzhiyun * Incrementing the refcount is prevented on JIT regions.
3861*4882a593Smuzhiyun * If/when this ever changes we would need to compensate
3862*4882a593Smuzhiyun * by implementing "free on putting the last reference",
3863*4882a593Smuzhiyun * but only for JIT regions.
3864*4882a593Smuzhiyun */
3865*4882a593Smuzhiyun WARN_ON(atomic_read(®->no_user_free_count) > 1);
3866*4882a593Smuzhiyun kbase_va_region_no_user_free_dec(reg);
3867*4882a593Smuzhiyun kbase_mem_free_region(kctx, reg);
3868*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
3869*4882a593Smuzhiyun } while (1);
3870*4882a593Smuzhiyun }
3871*4882a593Smuzhiyun
kbase_jit_init(struct kbase_context * kctx)3872*4882a593Smuzhiyun int kbase_jit_init(struct kbase_context *kctx)
3873*4882a593Smuzhiyun {
3874*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
3875*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->jit_active_head);
3876*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->jit_pool_head);
3877*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->jit_destroy_head);
3878*4882a593Smuzhiyun INIT_WORK(&kctx->jit_work, kbase_jit_destroy_worker);
3879*4882a593Smuzhiyun
3880*4882a593Smuzhiyun #if MALI_USE_CSF
3881*4882a593Smuzhiyun mutex_init(&kctx->csf.kcpu_queues.jit_lock);
3882*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_cmds_head);
3883*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->csf.kcpu_queues.jit_blocked_queues);
3884*4882a593Smuzhiyun #else /* !MALI_USE_CSF */
3885*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->jctx.jit_atoms_head);
3886*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->jctx.jit_pending_alloc);
3887*4882a593Smuzhiyun #endif /* MALI_USE_CSF */
3888*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
3889*4882a593Smuzhiyun
3890*4882a593Smuzhiyun kctx->jit_max_allocations = 0;
3891*4882a593Smuzhiyun kctx->jit_current_allocations = 0;
3892*4882a593Smuzhiyun kctx->trim_level = 0;
3893*4882a593Smuzhiyun
3894*4882a593Smuzhiyun return 0;
3895*4882a593Smuzhiyun }
3896*4882a593Smuzhiyun
3897*4882a593Smuzhiyun /* Check if the allocation from JIT pool is of the same size as the new JIT
3898*4882a593Smuzhiyun * allocation and also, if BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP is set, meets
3899*4882a593Smuzhiyun * the alignment requirements.
3900*4882a593Smuzhiyun */
meet_size_and_tiler_align_top_requirements(const struct kbase_va_region * walker,const struct base_jit_alloc_info * info)3901*4882a593Smuzhiyun static bool meet_size_and_tiler_align_top_requirements(
3902*4882a593Smuzhiyun const struct kbase_va_region *walker,
3903*4882a593Smuzhiyun const struct base_jit_alloc_info *info)
3904*4882a593Smuzhiyun {
3905*4882a593Smuzhiyun bool meet_reqs = true;
3906*4882a593Smuzhiyun
3907*4882a593Smuzhiyun if (walker->nr_pages != info->va_pages)
3908*4882a593Smuzhiyun meet_reqs = false;
3909*4882a593Smuzhiyun
3910*4882a593Smuzhiyun #if !MALI_USE_CSF
3911*4882a593Smuzhiyun if (meet_reqs && (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)) {
3912*4882a593Smuzhiyun size_t align = info->extension;
3913*4882a593Smuzhiyun size_t align_mask = align - 1;
3914*4882a593Smuzhiyun
3915*4882a593Smuzhiyun if ((walker->start_pfn + info->commit_pages) & align_mask)
3916*4882a593Smuzhiyun meet_reqs = false;
3917*4882a593Smuzhiyun }
3918*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3919*4882a593Smuzhiyun
3920*4882a593Smuzhiyun return meet_reqs;
3921*4882a593Smuzhiyun }
3922*4882a593Smuzhiyun
3923*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
3924*4882a593Smuzhiyun /* Function will guarantee *@freed will not exceed @pages_needed
3925*4882a593Smuzhiyun */
kbase_mem_jit_trim_pages_from_region(struct kbase_context * kctx,struct kbase_va_region * reg,size_t pages_needed,size_t * freed,bool shrink)3926*4882a593Smuzhiyun static int kbase_mem_jit_trim_pages_from_region(struct kbase_context *kctx,
3927*4882a593Smuzhiyun struct kbase_va_region *reg, size_t pages_needed,
3928*4882a593Smuzhiyun size_t *freed, bool shrink)
3929*4882a593Smuzhiyun {
3930*4882a593Smuzhiyun int err = 0;
3931*4882a593Smuzhiyun size_t available_pages = 0u;
3932*4882a593Smuzhiyun const size_t old_pages = kbase_reg_current_backed_size(reg);
3933*4882a593Smuzhiyun size_t new_pages = old_pages;
3934*4882a593Smuzhiyun size_t to_free = 0u;
3935*4882a593Smuzhiyun size_t max_allowed_pages = old_pages;
3936*4882a593Smuzhiyun
3937*4882a593Smuzhiyun #if !MALI_USE_CSF
3938*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
3939*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
3940*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
3941*4882a593Smuzhiyun
3942*4882a593Smuzhiyun /* Is this a JIT allocation that has been reported on? */
3943*4882a593Smuzhiyun if (reg->used_pages == reg->nr_pages)
3944*4882a593Smuzhiyun goto out;
3945*4882a593Smuzhiyun
3946*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)) {
3947*4882a593Smuzhiyun /* For address based memory usage calculation, the GPU
3948*4882a593Smuzhiyun * allocates objects of up to size 's', but aligns every object
3949*4882a593Smuzhiyun * to alignment 'a', with a < s.
3950*4882a593Smuzhiyun *
3951*4882a593Smuzhiyun * It also doesn't have to write to all bytes in an object of
3952*4882a593Smuzhiyun * size 's'.
3953*4882a593Smuzhiyun *
3954*4882a593Smuzhiyun * Hence, we can observe the GPU's address for the end of used
3955*4882a593Smuzhiyun * memory being up to (s - a) bytes into the first unallocated
3956*4882a593Smuzhiyun * page.
3957*4882a593Smuzhiyun *
3958*4882a593Smuzhiyun * We allow for this and only warn when it exceeds this bound
3959*4882a593Smuzhiyun * (rounded up to page sized units). Note, this is allowed to
3960*4882a593Smuzhiyun * exceed reg->nr_pages.
3961*4882a593Smuzhiyun */
3962*4882a593Smuzhiyun max_allowed_pages += PFN_UP(
3963*4882a593Smuzhiyun KBASE_GPU_ALLOCATED_OBJECT_MAX_BYTES -
3964*4882a593Smuzhiyun KBASE_GPU_ALLOCATED_OBJECT_ALIGN_BYTES);
3965*4882a593Smuzhiyun } else if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
3966*4882a593Smuzhiyun /* The GPU could report being ready to write to the next
3967*4882a593Smuzhiyun * 'extension' sized chunk, but didn't actually write to it, so we
3968*4882a593Smuzhiyun * can report up to 'extension' size pages more than the backed
3969*4882a593Smuzhiyun * size.
3970*4882a593Smuzhiyun *
3971*4882a593Smuzhiyun * Note, this is allowed to exceed reg->nr_pages.
3972*4882a593Smuzhiyun */
3973*4882a593Smuzhiyun max_allowed_pages += reg->extension;
3974*4882a593Smuzhiyun
3975*4882a593Smuzhiyun /* Also note that in these GPUs, the GPU may make a large (>1
3976*4882a593Smuzhiyun * page) initial allocation but not actually write out to all
3977*4882a593Smuzhiyun * of it. Hence it might report that a much higher amount of
3978*4882a593Smuzhiyun * memory was used than actually was written to. This does not
3979*4882a593Smuzhiyun * result in a real warning because on growing this memory we
3980*4882a593Smuzhiyun * round up the size of the allocation up to an 'extension' sized
3981*4882a593Smuzhiyun * chunk, hence automatically bringing the backed size up to
3982*4882a593Smuzhiyun * the reported size.
3983*4882a593Smuzhiyun */
3984*4882a593Smuzhiyun }
3985*4882a593Smuzhiyun
3986*4882a593Smuzhiyun if (old_pages < reg->used_pages) {
3987*4882a593Smuzhiyun /* Prevent overflow on available_pages, but only report the
3988*4882a593Smuzhiyun * problem if it's in a scenario where used_pages should have
3989*4882a593Smuzhiyun * been consistent with the backed size
3990*4882a593Smuzhiyun *
3991*4882a593Smuzhiyun * Note: In case of a size-based report, this legitimately
3992*4882a593Smuzhiyun * happens in common use-cases: we allow for up to this size of
3993*4882a593Smuzhiyun * memory being used, but depending on the content it doesn't
3994*4882a593Smuzhiyun * have to use all of it.
3995*4882a593Smuzhiyun *
3996*4882a593Smuzhiyun * Hence, we're much more quiet about that in the size-based
3997*4882a593Smuzhiyun * report case - it's not indicating a real problem, it's just
3998*4882a593Smuzhiyun * for information
3999*4882a593Smuzhiyun */
4000*4882a593Smuzhiyun if (max_allowed_pages < reg->used_pages) {
4001*4882a593Smuzhiyun if (!(reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE))
4002*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev,
4003*4882a593Smuzhiyun "%s: current backed pages %zu < reported used pages %zu (allowed to be up to %zu) on JIT 0x%llx vapages %zu\n",
4004*4882a593Smuzhiyun __func__,
4005*4882a593Smuzhiyun old_pages, reg->used_pages,
4006*4882a593Smuzhiyun max_allowed_pages,
4007*4882a593Smuzhiyun reg->start_pfn << PAGE_SHIFT,
4008*4882a593Smuzhiyun reg->nr_pages);
4009*4882a593Smuzhiyun else
4010*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
4011*4882a593Smuzhiyun "%s: no need to trim, current backed pages %zu < reported used pages %zu on size-report for JIT 0x%llx vapages %zu\n",
4012*4882a593Smuzhiyun __func__,
4013*4882a593Smuzhiyun old_pages, reg->used_pages,
4014*4882a593Smuzhiyun reg->start_pfn << PAGE_SHIFT,
4015*4882a593Smuzhiyun reg->nr_pages);
4016*4882a593Smuzhiyun }
4017*4882a593Smuzhiyun /* In any case, no error condition to report here, caller can
4018*4882a593Smuzhiyun * try other regions
4019*4882a593Smuzhiyun */
4020*4882a593Smuzhiyun
4021*4882a593Smuzhiyun goto out;
4022*4882a593Smuzhiyun }
4023*4882a593Smuzhiyun available_pages = old_pages - reg->used_pages;
4024*4882a593Smuzhiyun to_free = min(available_pages, pages_needed);
4025*4882a593Smuzhiyun
4026*4882a593Smuzhiyun if (shrink) {
4027*4882a593Smuzhiyun new_pages -= to_free;
4028*4882a593Smuzhiyun
4029*4882a593Smuzhiyun err = kbase_mem_shrink(kctx, reg, new_pages);
4030*4882a593Smuzhiyun }
4031*4882a593Smuzhiyun out:
4032*4882a593Smuzhiyun trace_mali_jit_trim_from_region(reg, to_free, old_pages,
4033*4882a593Smuzhiyun available_pages, new_pages);
4034*4882a593Smuzhiyun *freed = to_free;
4035*4882a593Smuzhiyun return err;
4036*4882a593Smuzhiyun }
4037*4882a593Smuzhiyun
4038*4882a593Smuzhiyun
4039*4882a593Smuzhiyun /**
4040*4882a593Smuzhiyun * kbase_mem_jit_trim_pages - Trim JIT regions until sufficient pages have been
4041*4882a593Smuzhiyun * freed
4042*4882a593Smuzhiyun * @kctx: Pointer to the kbase context whose active JIT allocations will be
4043*4882a593Smuzhiyun * checked.
4044*4882a593Smuzhiyun * @pages_needed: The maximum number of pages to trim.
4045*4882a593Smuzhiyun *
4046*4882a593Smuzhiyun * This functions checks all active JIT allocations in @kctx for unused pages
4047*4882a593Smuzhiyun * at the end, and trim the backed memory regions of those allocations down to
4048*4882a593Smuzhiyun * the used portion and free the unused pages into the page pool.
4049*4882a593Smuzhiyun *
4050*4882a593Smuzhiyun * Specifying @pages_needed allows us to stop early when there's enough
4051*4882a593Smuzhiyun * physical memory freed to sufficiently bring down the total JIT physical page
4052*4882a593Smuzhiyun * usage (e.g. to below the pressure limit)
4053*4882a593Smuzhiyun *
4054*4882a593Smuzhiyun * Return: Total number of successfully freed pages
4055*4882a593Smuzhiyun */
kbase_mem_jit_trim_pages(struct kbase_context * kctx,size_t pages_needed)4056*4882a593Smuzhiyun static size_t kbase_mem_jit_trim_pages(struct kbase_context *kctx,
4057*4882a593Smuzhiyun size_t pages_needed)
4058*4882a593Smuzhiyun {
4059*4882a593Smuzhiyun struct kbase_va_region *reg, *tmp;
4060*4882a593Smuzhiyun size_t total_freed = 0;
4061*4882a593Smuzhiyun
4062*4882a593Smuzhiyun #if !MALI_USE_CSF
4063*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
4064*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4065*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
4066*4882a593Smuzhiyun lockdep_assert_held(&kctx->jit_evict_lock);
4067*4882a593Smuzhiyun
4068*4882a593Smuzhiyun list_for_each_entry_safe(reg, tmp, &kctx->jit_active_head, jit_node) {
4069*4882a593Smuzhiyun int err;
4070*4882a593Smuzhiyun size_t freed = 0u;
4071*4882a593Smuzhiyun
4072*4882a593Smuzhiyun err = kbase_mem_jit_trim_pages_from_region(kctx, reg,
4073*4882a593Smuzhiyun pages_needed, &freed, true);
4074*4882a593Smuzhiyun
4075*4882a593Smuzhiyun if (err) {
4076*4882a593Smuzhiyun /* Failed to trim, try the next region */
4077*4882a593Smuzhiyun continue;
4078*4882a593Smuzhiyun }
4079*4882a593Smuzhiyun
4080*4882a593Smuzhiyun total_freed += freed;
4081*4882a593Smuzhiyun WARN_ON(freed > pages_needed);
4082*4882a593Smuzhiyun pages_needed -= freed;
4083*4882a593Smuzhiyun if (!pages_needed)
4084*4882a593Smuzhiyun break;
4085*4882a593Smuzhiyun }
4086*4882a593Smuzhiyun
4087*4882a593Smuzhiyun trace_mali_jit_trim(total_freed);
4088*4882a593Smuzhiyun
4089*4882a593Smuzhiyun return total_freed;
4090*4882a593Smuzhiyun }
4091*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4092*4882a593Smuzhiyun
kbase_jit_grow(struct kbase_context * kctx,const struct base_jit_alloc_info * info,struct kbase_va_region * reg,struct kbase_sub_alloc ** prealloc_sas,enum kbase_caller_mmu_sync_info mmu_sync_info)4093*4882a593Smuzhiyun static int kbase_jit_grow(struct kbase_context *kctx,
4094*4882a593Smuzhiyun const struct base_jit_alloc_info *info,
4095*4882a593Smuzhiyun struct kbase_va_region *reg,
4096*4882a593Smuzhiyun struct kbase_sub_alloc **prealloc_sas,
4097*4882a593Smuzhiyun enum kbase_caller_mmu_sync_info mmu_sync_info)
4098*4882a593Smuzhiyun {
4099*4882a593Smuzhiyun size_t delta;
4100*4882a593Smuzhiyun size_t pages_required;
4101*4882a593Smuzhiyun size_t old_size;
4102*4882a593Smuzhiyun struct kbase_mem_pool *pool;
4103*4882a593Smuzhiyun int ret = -ENOMEM;
4104*4882a593Smuzhiyun struct tagged_addr *gpu_pages;
4105*4882a593Smuzhiyun
4106*4882a593Smuzhiyun if (info->commit_pages > reg->nr_pages) {
4107*4882a593Smuzhiyun /* Attempted to grow larger than maximum size */
4108*4882a593Smuzhiyun return -EINVAL;
4109*4882a593Smuzhiyun }
4110*4882a593Smuzhiyun
4111*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
4112*4882a593Smuzhiyun
4113*4882a593Smuzhiyun /* Make the physical backing no longer reclaimable */
4114*4882a593Smuzhiyun if (!kbase_mem_evictable_unmake(reg->gpu_alloc))
4115*4882a593Smuzhiyun goto update_failed;
4116*4882a593Smuzhiyun
4117*4882a593Smuzhiyun if (reg->gpu_alloc->nents >= info->commit_pages)
4118*4882a593Smuzhiyun goto done;
4119*4882a593Smuzhiyun
4120*4882a593Smuzhiyun /* Grow the backing */
4121*4882a593Smuzhiyun old_size = reg->gpu_alloc->nents;
4122*4882a593Smuzhiyun
4123*4882a593Smuzhiyun /* Allocate some more pages */
4124*4882a593Smuzhiyun delta = info->commit_pages - reg->gpu_alloc->nents;
4125*4882a593Smuzhiyun pages_required = delta;
4126*4882a593Smuzhiyun
4127*4882a593Smuzhiyun if (kctx->kbdev->pagesize_2mb && pages_required >= (SZ_2M / SZ_4K)) {
4128*4882a593Smuzhiyun pool = &kctx->mem_pools.large[kctx->jit_group_id];
4129*4882a593Smuzhiyun /* Round up to number of 2 MB pages required */
4130*4882a593Smuzhiyun pages_required += ((SZ_2M / SZ_4K) - 1);
4131*4882a593Smuzhiyun pages_required /= (SZ_2M / SZ_4K);
4132*4882a593Smuzhiyun } else {
4133*4882a593Smuzhiyun pool = &kctx->mem_pools.small[kctx->jit_group_id];
4134*4882a593Smuzhiyun }
4135*4882a593Smuzhiyun
4136*4882a593Smuzhiyun if (reg->cpu_alloc != reg->gpu_alloc)
4137*4882a593Smuzhiyun pages_required *= 2;
4138*4882a593Smuzhiyun
4139*4882a593Smuzhiyun spin_lock(&kctx->mem_partials_lock);
4140*4882a593Smuzhiyun kbase_mem_pool_lock(pool);
4141*4882a593Smuzhiyun
4142*4882a593Smuzhiyun /* As we can not allocate memory from the kernel with the vm_lock held,
4143*4882a593Smuzhiyun * grow the pool to the required size with the lock dropped. We hold the
4144*4882a593Smuzhiyun * pool lock to prevent another thread from allocating from the pool
4145*4882a593Smuzhiyun * between the grow and allocation.
4146*4882a593Smuzhiyun */
4147*4882a593Smuzhiyun while (kbase_mem_pool_size(pool) < pages_required) {
4148*4882a593Smuzhiyun int pool_delta = pages_required - kbase_mem_pool_size(pool);
4149*4882a593Smuzhiyun int ret;
4150*4882a593Smuzhiyun
4151*4882a593Smuzhiyun kbase_mem_pool_unlock(pool);
4152*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
4153*4882a593Smuzhiyun
4154*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4155*4882a593Smuzhiyun ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
4156*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
4157*4882a593Smuzhiyun
4158*4882a593Smuzhiyun if (ret)
4159*4882a593Smuzhiyun goto update_failed;
4160*4882a593Smuzhiyun
4161*4882a593Smuzhiyun spin_lock(&kctx->mem_partials_lock);
4162*4882a593Smuzhiyun kbase_mem_pool_lock(pool);
4163*4882a593Smuzhiyun }
4164*4882a593Smuzhiyun
4165*4882a593Smuzhiyun gpu_pages = kbase_alloc_phy_pages_helper_locked(reg->gpu_alloc, pool,
4166*4882a593Smuzhiyun delta, &prealloc_sas[0]);
4167*4882a593Smuzhiyun if (!gpu_pages) {
4168*4882a593Smuzhiyun kbase_mem_pool_unlock(pool);
4169*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
4170*4882a593Smuzhiyun goto update_failed;
4171*4882a593Smuzhiyun }
4172*4882a593Smuzhiyun
4173*4882a593Smuzhiyun if (reg->cpu_alloc != reg->gpu_alloc) {
4174*4882a593Smuzhiyun struct tagged_addr *cpu_pages;
4175*4882a593Smuzhiyun
4176*4882a593Smuzhiyun cpu_pages = kbase_alloc_phy_pages_helper_locked(reg->cpu_alloc,
4177*4882a593Smuzhiyun pool, delta, &prealloc_sas[1]);
4178*4882a593Smuzhiyun if (!cpu_pages) {
4179*4882a593Smuzhiyun kbase_free_phy_pages_helper_locked(reg->gpu_alloc,
4180*4882a593Smuzhiyun pool, gpu_pages, delta);
4181*4882a593Smuzhiyun kbase_mem_pool_unlock(pool);
4182*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
4183*4882a593Smuzhiyun goto update_failed;
4184*4882a593Smuzhiyun }
4185*4882a593Smuzhiyun }
4186*4882a593Smuzhiyun kbase_mem_pool_unlock(pool);
4187*4882a593Smuzhiyun spin_unlock(&kctx->mem_partials_lock);
4188*4882a593Smuzhiyun
4189*4882a593Smuzhiyun ret = kbase_mem_grow_gpu_mapping(kctx, reg, info->commit_pages,
4190*4882a593Smuzhiyun old_size, mmu_sync_info);
4191*4882a593Smuzhiyun /*
4192*4882a593Smuzhiyun * The grow failed so put the allocation back in the
4193*4882a593Smuzhiyun * pool and return failure.
4194*4882a593Smuzhiyun */
4195*4882a593Smuzhiyun if (ret)
4196*4882a593Smuzhiyun goto update_failed;
4197*4882a593Smuzhiyun
4198*4882a593Smuzhiyun done:
4199*4882a593Smuzhiyun ret = 0;
4200*4882a593Smuzhiyun
4201*4882a593Smuzhiyun /* Update attributes of JIT allocation taken from the pool */
4202*4882a593Smuzhiyun reg->initial_commit = info->commit_pages;
4203*4882a593Smuzhiyun reg->extension = info->extension;
4204*4882a593Smuzhiyun
4205*4882a593Smuzhiyun update_failed:
4206*4882a593Smuzhiyun return ret;
4207*4882a593Smuzhiyun }
4208*4882a593Smuzhiyun
trace_jit_stats(struct kbase_context * kctx,u32 bin_id,u32 max_allocations)4209*4882a593Smuzhiyun static void trace_jit_stats(struct kbase_context *kctx,
4210*4882a593Smuzhiyun u32 bin_id, u32 max_allocations)
4211*4882a593Smuzhiyun {
4212*4882a593Smuzhiyun const u32 alloc_count =
4213*4882a593Smuzhiyun kctx->jit_current_allocations_per_bin[bin_id];
4214*4882a593Smuzhiyun struct kbase_device *kbdev = kctx->kbdev;
4215*4882a593Smuzhiyun
4216*4882a593Smuzhiyun struct kbase_va_region *walker;
4217*4882a593Smuzhiyun u32 va_pages = 0;
4218*4882a593Smuzhiyun u32 ph_pages = 0;
4219*4882a593Smuzhiyun
4220*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4221*4882a593Smuzhiyun list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
4222*4882a593Smuzhiyun if (walker->jit_bin_id != bin_id)
4223*4882a593Smuzhiyun continue;
4224*4882a593Smuzhiyun
4225*4882a593Smuzhiyun va_pages += walker->nr_pages;
4226*4882a593Smuzhiyun ph_pages += walker->gpu_alloc->nents;
4227*4882a593Smuzhiyun }
4228*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4229*4882a593Smuzhiyun
4230*4882a593Smuzhiyun KBASE_TLSTREAM_AUX_JIT_STATS(kbdev, kctx->id, bin_id,
4231*4882a593Smuzhiyun max_allocations, alloc_count, va_pages, ph_pages);
4232*4882a593Smuzhiyun }
4233*4882a593Smuzhiyun
4234*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4235*4882a593Smuzhiyun /**
4236*4882a593Smuzhiyun * get_jit_phys_backing() - calculate the physical backing of all JIT
4237*4882a593Smuzhiyun * allocations
4238*4882a593Smuzhiyun *
4239*4882a593Smuzhiyun * @kctx: Pointer to the kbase context whose active JIT allocations will be
4240*4882a593Smuzhiyun * checked
4241*4882a593Smuzhiyun *
4242*4882a593Smuzhiyun * Return: number of pages that are committed by JIT allocations
4243*4882a593Smuzhiyun */
get_jit_phys_backing(struct kbase_context * kctx)4244*4882a593Smuzhiyun static size_t get_jit_phys_backing(struct kbase_context *kctx)
4245*4882a593Smuzhiyun {
4246*4882a593Smuzhiyun struct kbase_va_region *walker;
4247*4882a593Smuzhiyun size_t backing = 0;
4248*4882a593Smuzhiyun
4249*4882a593Smuzhiyun lockdep_assert_held(&kctx->jit_evict_lock);
4250*4882a593Smuzhiyun
4251*4882a593Smuzhiyun list_for_each_entry(walker, &kctx->jit_active_head, jit_node) {
4252*4882a593Smuzhiyun backing += kbase_reg_current_backed_size(walker);
4253*4882a593Smuzhiyun }
4254*4882a593Smuzhiyun
4255*4882a593Smuzhiyun return backing;
4256*4882a593Smuzhiyun }
4257*4882a593Smuzhiyun
kbase_jit_trim_necessary_pages(struct kbase_context * kctx,size_t needed_pages)4258*4882a593Smuzhiyun void kbase_jit_trim_necessary_pages(struct kbase_context *kctx,
4259*4882a593Smuzhiyun size_t needed_pages)
4260*4882a593Smuzhiyun {
4261*4882a593Smuzhiyun size_t jit_backing = 0;
4262*4882a593Smuzhiyun size_t pages_to_trim = 0;
4263*4882a593Smuzhiyun
4264*4882a593Smuzhiyun #if !MALI_USE_CSF
4265*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
4266*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4267*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
4268*4882a593Smuzhiyun lockdep_assert_held(&kctx->jit_evict_lock);
4269*4882a593Smuzhiyun
4270*4882a593Smuzhiyun jit_backing = get_jit_phys_backing(kctx);
4271*4882a593Smuzhiyun
4272*4882a593Smuzhiyun /* It is possible that this is the case - if this is the first
4273*4882a593Smuzhiyun * allocation after "ignore_pressure_limit" allocation.
4274*4882a593Smuzhiyun */
4275*4882a593Smuzhiyun if (jit_backing > kctx->jit_phys_pages_limit) {
4276*4882a593Smuzhiyun pages_to_trim += (jit_backing - kctx->jit_phys_pages_limit) +
4277*4882a593Smuzhiyun needed_pages;
4278*4882a593Smuzhiyun } else {
4279*4882a593Smuzhiyun size_t backed_diff = kctx->jit_phys_pages_limit - jit_backing;
4280*4882a593Smuzhiyun
4281*4882a593Smuzhiyun if (needed_pages > backed_diff)
4282*4882a593Smuzhiyun pages_to_trim += needed_pages - backed_diff;
4283*4882a593Smuzhiyun }
4284*4882a593Smuzhiyun
4285*4882a593Smuzhiyun if (pages_to_trim) {
4286*4882a593Smuzhiyun size_t trimmed_pages =
4287*4882a593Smuzhiyun kbase_mem_jit_trim_pages(kctx, pages_to_trim);
4288*4882a593Smuzhiyun
4289*4882a593Smuzhiyun /* This should never happen - we already asserted that
4290*4882a593Smuzhiyun * we are not violating JIT pressure limit in earlier
4291*4882a593Smuzhiyun * checks, which means that in-flight JIT allocations
4292*4882a593Smuzhiyun * must have enough unused pages to satisfy the new
4293*4882a593Smuzhiyun * allocation
4294*4882a593Smuzhiyun */
4295*4882a593Smuzhiyun WARN_ON(trimmed_pages < pages_to_trim);
4296*4882a593Smuzhiyun }
4297*4882a593Smuzhiyun }
4298*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4299*4882a593Smuzhiyun
4300*4882a593Smuzhiyun /**
4301*4882a593Smuzhiyun * jit_allow_allocate() - check whether basic conditions are satisfied to allow
4302*4882a593Smuzhiyun * a new JIT allocation
4303*4882a593Smuzhiyun *
4304*4882a593Smuzhiyun * @kctx: Pointer to the kbase context
4305*4882a593Smuzhiyun * @info: Pointer to JIT allocation information for the new allocation
4306*4882a593Smuzhiyun * @ignore_pressure_limit: Flag to indicate whether JIT pressure limit check
4307*4882a593Smuzhiyun * should be ignored
4308*4882a593Smuzhiyun *
4309*4882a593Smuzhiyun * Return: true if allocation can be executed, false otherwise
4310*4882a593Smuzhiyun */
jit_allow_allocate(struct kbase_context * kctx,const struct base_jit_alloc_info * info,bool ignore_pressure_limit)4311*4882a593Smuzhiyun static bool jit_allow_allocate(struct kbase_context *kctx,
4312*4882a593Smuzhiyun const struct base_jit_alloc_info *info,
4313*4882a593Smuzhiyun bool ignore_pressure_limit)
4314*4882a593Smuzhiyun {
4315*4882a593Smuzhiyun #if !MALI_USE_CSF
4316*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
4317*4882a593Smuzhiyun #else /* MALI_USE_CSF */
4318*4882a593Smuzhiyun lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
4319*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4320*4882a593Smuzhiyun
4321*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4322*4882a593Smuzhiyun if (!ignore_pressure_limit &&
4323*4882a593Smuzhiyun ((kctx->jit_phys_pages_limit <= kctx->jit_current_phys_pressure) ||
4324*4882a593Smuzhiyun (info->va_pages > (kctx->jit_phys_pages_limit - kctx->jit_current_phys_pressure)))) {
4325*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
4326*4882a593Smuzhiyun "Max JIT page allocations limit reached: active pages %llu, max pages %llu\n",
4327*4882a593Smuzhiyun kctx->jit_current_phys_pressure + info->va_pages,
4328*4882a593Smuzhiyun kctx->jit_phys_pages_limit);
4329*4882a593Smuzhiyun return false;
4330*4882a593Smuzhiyun }
4331*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4332*4882a593Smuzhiyun
4333*4882a593Smuzhiyun if (kctx->jit_current_allocations >= kctx->jit_max_allocations) {
4334*4882a593Smuzhiyun /* Too many current allocations */
4335*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
4336*4882a593Smuzhiyun "Max JIT allocations limit reached: active allocations %d, max allocations %d\n",
4337*4882a593Smuzhiyun kctx->jit_current_allocations,
4338*4882a593Smuzhiyun kctx->jit_max_allocations);
4339*4882a593Smuzhiyun return false;
4340*4882a593Smuzhiyun }
4341*4882a593Smuzhiyun
4342*4882a593Smuzhiyun if (info->max_allocations > 0 &&
4343*4882a593Smuzhiyun kctx->jit_current_allocations_per_bin[info->bin_id] >=
4344*4882a593Smuzhiyun info->max_allocations) {
4345*4882a593Smuzhiyun /* Too many current allocations in this bin */
4346*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
4347*4882a593Smuzhiyun "Per bin limit of max JIT allocations reached: bin_id %d, active allocations %d, max allocations %d\n",
4348*4882a593Smuzhiyun info->bin_id,
4349*4882a593Smuzhiyun kctx->jit_current_allocations_per_bin[info->bin_id],
4350*4882a593Smuzhiyun info->max_allocations);
4351*4882a593Smuzhiyun return false;
4352*4882a593Smuzhiyun }
4353*4882a593Smuzhiyun
4354*4882a593Smuzhiyun return true;
4355*4882a593Smuzhiyun }
4356*4882a593Smuzhiyun
4357*4882a593Smuzhiyun static struct kbase_va_region *
find_reasonable_region(const struct base_jit_alloc_info * info,struct list_head * pool_head,bool ignore_usage_id)4358*4882a593Smuzhiyun find_reasonable_region(const struct base_jit_alloc_info *info,
4359*4882a593Smuzhiyun struct list_head *pool_head, bool ignore_usage_id)
4360*4882a593Smuzhiyun {
4361*4882a593Smuzhiyun struct kbase_va_region *closest_reg = NULL;
4362*4882a593Smuzhiyun struct kbase_va_region *walker;
4363*4882a593Smuzhiyun size_t current_diff = SIZE_MAX;
4364*4882a593Smuzhiyun
4365*4882a593Smuzhiyun list_for_each_entry(walker, pool_head, jit_node) {
4366*4882a593Smuzhiyun if ((ignore_usage_id ||
4367*4882a593Smuzhiyun walker->jit_usage_id == info->usage_id) &&
4368*4882a593Smuzhiyun walker->jit_bin_id == info->bin_id &&
4369*4882a593Smuzhiyun meet_size_and_tiler_align_top_requirements(walker, info)) {
4370*4882a593Smuzhiyun size_t min_size, max_size, diff;
4371*4882a593Smuzhiyun
4372*4882a593Smuzhiyun /*
4373*4882a593Smuzhiyun * The JIT allocations VA requirements have been met,
4374*4882a593Smuzhiyun * it's suitable but other allocations might be a
4375*4882a593Smuzhiyun * better fit.
4376*4882a593Smuzhiyun */
4377*4882a593Smuzhiyun min_size = min_t(size_t, walker->gpu_alloc->nents,
4378*4882a593Smuzhiyun info->commit_pages);
4379*4882a593Smuzhiyun max_size = max_t(size_t, walker->gpu_alloc->nents,
4380*4882a593Smuzhiyun info->commit_pages);
4381*4882a593Smuzhiyun diff = max_size - min_size;
4382*4882a593Smuzhiyun
4383*4882a593Smuzhiyun if (current_diff > diff) {
4384*4882a593Smuzhiyun current_diff = diff;
4385*4882a593Smuzhiyun closest_reg = walker;
4386*4882a593Smuzhiyun }
4387*4882a593Smuzhiyun
4388*4882a593Smuzhiyun /* The allocation is an exact match */
4389*4882a593Smuzhiyun if (current_diff == 0)
4390*4882a593Smuzhiyun break;
4391*4882a593Smuzhiyun }
4392*4882a593Smuzhiyun }
4393*4882a593Smuzhiyun
4394*4882a593Smuzhiyun return closest_reg;
4395*4882a593Smuzhiyun }
4396*4882a593Smuzhiyun
kbase_jit_allocate(struct kbase_context * kctx,const struct base_jit_alloc_info * info,bool ignore_pressure_limit)4397*4882a593Smuzhiyun struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
4398*4882a593Smuzhiyun const struct base_jit_alloc_info *info,
4399*4882a593Smuzhiyun bool ignore_pressure_limit)
4400*4882a593Smuzhiyun {
4401*4882a593Smuzhiyun struct kbase_va_region *reg = NULL;
4402*4882a593Smuzhiyun struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
4403*4882a593Smuzhiyun int i;
4404*4882a593Smuzhiyun
4405*4882a593Smuzhiyun /* Calls to this function are inherently synchronous, with respect to
4406*4882a593Smuzhiyun * MMU operations.
4407*4882a593Smuzhiyun */
4408*4882a593Smuzhiyun const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
4409*4882a593Smuzhiyun
4410*4882a593Smuzhiyun #if !MALI_USE_CSF
4411*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
4412*4882a593Smuzhiyun #else /* MALI_USE_CSF */
4413*4882a593Smuzhiyun lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
4414*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4415*4882a593Smuzhiyun
4416*4882a593Smuzhiyun if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
4417*4882a593Smuzhiyun return NULL;
4418*4882a593Smuzhiyun
4419*4882a593Smuzhiyun if (kctx->kbdev->pagesize_2mb) {
4420*4882a593Smuzhiyun /* Preallocate memory for the sub-allocation structs */
4421*4882a593Smuzhiyun for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
4422*4882a593Smuzhiyun prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
4423*4882a593Smuzhiyun if (!prealloc_sas[i])
4424*4882a593Smuzhiyun goto end;
4425*4882a593Smuzhiyun }
4426*4882a593Smuzhiyun }
4427*4882a593Smuzhiyun
4428*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
4429*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4430*4882a593Smuzhiyun
4431*4882a593Smuzhiyun /*
4432*4882a593Smuzhiyun * Scan the pool for an existing allocation which meets our
4433*4882a593Smuzhiyun * requirements and remove it.
4434*4882a593Smuzhiyun */
4435*4882a593Smuzhiyun if (info->usage_id != 0)
4436*4882a593Smuzhiyun /* First scan for an allocation with the same usage ID */
4437*4882a593Smuzhiyun reg = find_reasonable_region(info, &kctx->jit_pool_head, false);
4438*4882a593Smuzhiyun
4439*4882a593Smuzhiyun if (!reg)
4440*4882a593Smuzhiyun /* No allocation with the same usage ID, or usage IDs not in
4441*4882a593Smuzhiyun * use. Search for an allocation we can reuse.
4442*4882a593Smuzhiyun */
4443*4882a593Smuzhiyun reg = find_reasonable_region(info, &kctx->jit_pool_head, true);
4444*4882a593Smuzhiyun
4445*4882a593Smuzhiyun if (reg) {
4446*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4447*4882a593Smuzhiyun size_t needed_pages = 0;
4448*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4449*4882a593Smuzhiyun int ret;
4450*4882a593Smuzhiyun
4451*4882a593Smuzhiyun /*
4452*4882a593Smuzhiyun * Remove the found region from the pool and add it to the
4453*4882a593Smuzhiyun * active list.
4454*4882a593Smuzhiyun */
4455*4882a593Smuzhiyun list_move(®->jit_node, &kctx->jit_active_head);
4456*4882a593Smuzhiyun
4457*4882a593Smuzhiyun WARN_ON(reg->gpu_alloc->evicted);
4458*4882a593Smuzhiyun
4459*4882a593Smuzhiyun /*
4460*4882a593Smuzhiyun * Remove the allocation from the eviction list as it's no
4461*4882a593Smuzhiyun * longer eligible for eviction. This must be done before
4462*4882a593Smuzhiyun * dropping the jit_evict_lock
4463*4882a593Smuzhiyun */
4464*4882a593Smuzhiyun list_del_init(®->gpu_alloc->evict_node);
4465*4882a593Smuzhiyun
4466*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4467*4882a593Smuzhiyun if (!ignore_pressure_limit) {
4468*4882a593Smuzhiyun if (info->commit_pages > reg->gpu_alloc->nents)
4469*4882a593Smuzhiyun needed_pages = info->commit_pages -
4470*4882a593Smuzhiyun reg->gpu_alloc->nents;
4471*4882a593Smuzhiyun
4472*4882a593Smuzhiyun /* Update early the recycled JIT region's estimate of
4473*4882a593Smuzhiyun * used_pages to ensure it doesn't get trimmed
4474*4882a593Smuzhiyun * undesirably. This is needed as the recycled JIT
4475*4882a593Smuzhiyun * region has been added to the active list but the
4476*4882a593Smuzhiyun * number of used pages for it would be zero, so it
4477*4882a593Smuzhiyun * could get trimmed instead of other allocations only
4478*4882a593Smuzhiyun * to be regrown later resulting in a breach of the JIT
4479*4882a593Smuzhiyun * physical pressure limit.
4480*4882a593Smuzhiyun * Also that trimming would disturb the accounting of
4481*4882a593Smuzhiyun * physical pages, i.e. the VM stats, as the number of
4482*4882a593Smuzhiyun * backing pages would have changed when the call to
4483*4882a593Smuzhiyun * kbase_mem_evictable_unmark_reclaim is made.
4484*4882a593Smuzhiyun *
4485*4882a593Smuzhiyun * The second call to update pressure at the end of
4486*4882a593Smuzhiyun * this function would effectively be a nop.
4487*4882a593Smuzhiyun */
4488*4882a593Smuzhiyun kbase_jit_report_update_pressure(
4489*4882a593Smuzhiyun kctx, reg, info->va_pages,
4490*4882a593Smuzhiyun KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
4491*4882a593Smuzhiyun
4492*4882a593Smuzhiyun kbase_jit_request_phys_increase_locked(kctx,
4493*4882a593Smuzhiyun needed_pages);
4494*4882a593Smuzhiyun }
4495*4882a593Smuzhiyun #endif
4496*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4497*4882a593Smuzhiyun
4498*4882a593Smuzhiyun /* kbase_jit_grow() can release & reacquire 'kctx->reg_lock',
4499*4882a593Smuzhiyun * so any state protected by that lock might need to be
4500*4882a593Smuzhiyun * re-evaluated if more code is added here in future.
4501*4882a593Smuzhiyun */
4502*4882a593Smuzhiyun ret = kbase_jit_grow(kctx, info, reg, prealloc_sas,
4503*4882a593Smuzhiyun mmu_sync_info);
4504*4882a593Smuzhiyun
4505*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4506*4882a593Smuzhiyun if (!ignore_pressure_limit)
4507*4882a593Smuzhiyun kbase_jit_done_phys_increase(kctx, needed_pages);
4508*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4509*4882a593Smuzhiyun
4510*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4511*4882a593Smuzhiyun
4512*4882a593Smuzhiyun if (ret < 0) {
4513*4882a593Smuzhiyun /*
4514*4882a593Smuzhiyun * An update to an allocation from the pool failed,
4515*4882a593Smuzhiyun * chances are slim a new allocation would fare any
4516*4882a593Smuzhiyun * better so return the allocation to the pool and
4517*4882a593Smuzhiyun * return the function with failure.
4518*4882a593Smuzhiyun */
4519*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
4520*4882a593Smuzhiyun "JIT allocation resize failed: va_pages 0x%llx, commit_pages 0x%llx\n",
4521*4882a593Smuzhiyun info->va_pages, info->commit_pages);
4522*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4523*4882a593Smuzhiyun /* Undo the early change made to the recycled JIT
4524*4882a593Smuzhiyun * region's estimate of used_pages.
4525*4882a593Smuzhiyun */
4526*4882a593Smuzhiyun if (!ignore_pressure_limit) {
4527*4882a593Smuzhiyun kbase_jit_report_update_pressure(
4528*4882a593Smuzhiyun kctx, reg, 0,
4529*4882a593Smuzhiyun KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
4530*4882a593Smuzhiyun }
4531*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4532*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4533*4882a593Smuzhiyun list_move(®->jit_node, &kctx->jit_pool_head);
4534*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4535*4882a593Smuzhiyun reg = NULL;
4536*4882a593Smuzhiyun goto end;
4537*4882a593Smuzhiyun } else {
4538*4882a593Smuzhiyun /* A suitable JIT allocation existed on the evict list, so we need
4539*4882a593Smuzhiyun * to make sure that the NOT_MOVABLE property is cleared.
4540*4882a593Smuzhiyun */
4541*4882a593Smuzhiyun if (kbase_page_migration_enabled) {
4542*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
4543*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4544*4882a593Smuzhiyun kbase_set_phy_alloc_page_status(reg->gpu_alloc, ALLOCATED_MAPPED);
4545*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4546*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4547*4882a593Smuzhiyun }
4548*4882a593Smuzhiyun }
4549*4882a593Smuzhiyun } else {
4550*4882a593Smuzhiyun /* No suitable JIT allocation was found so create a new one */
4551*4882a593Smuzhiyun u64 flags = BASE_MEM_PROT_CPU_RD | BASE_MEM_PROT_GPU_RD |
4552*4882a593Smuzhiyun BASE_MEM_PROT_GPU_WR | BASE_MEM_GROW_ON_GPF |
4553*4882a593Smuzhiyun BASE_MEM_COHERENT_LOCAL |
4554*4882a593Smuzhiyun BASEP_MEM_NO_USER_FREE;
4555*4882a593Smuzhiyun u64 gpu_addr;
4556*4882a593Smuzhiyun
4557*4882a593Smuzhiyun #if !MALI_USE_CSF
4558*4882a593Smuzhiyun if (info->flags & BASE_JIT_ALLOC_MEM_TILER_ALIGN_TOP)
4559*4882a593Smuzhiyun flags |= BASE_MEM_TILER_ALIGN_TOP;
4560*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4561*4882a593Smuzhiyun
4562*4882a593Smuzhiyun flags |= kbase_mem_group_id_set(kctx->jit_group_id);
4563*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4564*4882a593Smuzhiyun if (!ignore_pressure_limit) {
4565*4882a593Smuzhiyun flags |= BASEP_MEM_PERFORM_JIT_TRIM;
4566*4882a593Smuzhiyun /* The corresponding call to 'done_phys_increase' would
4567*4882a593Smuzhiyun * be made inside the kbase_mem_alloc().
4568*4882a593Smuzhiyun */
4569*4882a593Smuzhiyun kbase_jit_request_phys_increase_locked(
4570*4882a593Smuzhiyun kctx, info->commit_pages);
4571*4882a593Smuzhiyun }
4572*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4573*4882a593Smuzhiyun
4574*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4575*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4576*4882a593Smuzhiyun
4577*4882a593Smuzhiyun reg = kbase_mem_alloc(kctx, info->va_pages, info->commit_pages, info->extension,
4578*4882a593Smuzhiyun &flags, &gpu_addr, mmu_sync_info);
4579*4882a593Smuzhiyun if (!reg) {
4580*4882a593Smuzhiyun /* Most likely not enough GPU virtual space left for
4581*4882a593Smuzhiyun * the new JIT allocation.
4582*4882a593Smuzhiyun */
4583*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
4584*4882a593Smuzhiyun "Failed to allocate JIT memory: va_pages 0x%llx, commit_pages 0x%llx\n",
4585*4882a593Smuzhiyun info->va_pages, info->commit_pages);
4586*4882a593Smuzhiyun goto end;
4587*4882a593Smuzhiyun }
4588*4882a593Smuzhiyun
4589*4882a593Smuzhiyun if (!ignore_pressure_limit) {
4590*4882a593Smuzhiyun /* Due to enforcing of pressure limit, kbase_mem_alloc
4591*4882a593Smuzhiyun * was instructed to perform the trimming which in turn
4592*4882a593Smuzhiyun * would have ensured that the new JIT allocation is
4593*4882a593Smuzhiyun * already in the jit_active_head list, so nothing to
4594*4882a593Smuzhiyun * do here.
4595*4882a593Smuzhiyun */
4596*4882a593Smuzhiyun WARN_ON(list_empty(®->jit_node));
4597*4882a593Smuzhiyun } else {
4598*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4599*4882a593Smuzhiyun list_add(®->jit_node, &kctx->jit_active_head);
4600*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4601*4882a593Smuzhiyun }
4602*4882a593Smuzhiyun }
4603*4882a593Smuzhiyun
4604*4882a593Smuzhiyun /* Similarly to tiler heap init, there is a short window of time
4605*4882a593Smuzhiyun * where the (either recycled or newly allocated, in our case) region has
4606*4882a593Smuzhiyun * "no user free" count incremented but is still missing the DONT_NEED flag, and
4607*4882a593Smuzhiyun * doesn't yet have the ACTIVE_JIT_ALLOC flag either. Temporarily leaking the
4608*4882a593Smuzhiyun * allocation is the least bad option that doesn't lead to a security issue down the
4609*4882a593Smuzhiyun * line (it will eventually be cleaned up during context termination).
4610*4882a593Smuzhiyun *
4611*4882a593Smuzhiyun * We also need to call kbase_gpu_vm_lock regardless, as we're updating the region
4612*4882a593Smuzhiyun * flags.
4613*4882a593Smuzhiyun */
4614*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
4615*4882a593Smuzhiyun if (unlikely(atomic_read(®->no_user_free_count) > 1)) {
4616*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4617*4882a593Smuzhiyun dev_err(kctx->kbdev->dev, "JIT region has no_user_free_count > 1!\n");
4618*4882a593Smuzhiyun
4619*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4620*4882a593Smuzhiyun list_move(®->jit_node, &kctx->jit_pool_head);
4621*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4622*4882a593Smuzhiyun
4623*4882a593Smuzhiyun reg = NULL;
4624*4882a593Smuzhiyun goto end;
4625*4882a593Smuzhiyun }
4626*4882a593Smuzhiyun
4627*4882a593Smuzhiyun trace_mali_jit_alloc(reg, info->id);
4628*4882a593Smuzhiyun
4629*4882a593Smuzhiyun kctx->jit_current_allocations++;
4630*4882a593Smuzhiyun kctx->jit_current_allocations_per_bin[info->bin_id]++;
4631*4882a593Smuzhiyun
4632*4882a593Smuzhiyun trace_jit_stats(kctx, info->bin_id, info->max_allocations);
4633*4882a593Smuzhiyun
4634*4882a593Smuzhiyun reg->jit_usage_id = info->usage_id;
4635*4882a593Smuzhiyun reg->jit_bin_id = info->bin_id;
4636*4882a593Smuzhiyun reg->flags |= KBASE_REG_ACTIVE_JIT_ALLOC;
4637*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4638*4882a593Smuzhiyun if (info->flags & BASE_JIT_ALLOC_HEAP_INFO_IS_SIZE)
4639*4882a593Smuzhiyun reg->flags = reg->flags | KBASE_REG_HEAP_INFO_IS_SIZE;
4640*4882a593Smuzhiyun reg->heap_info_gpu_addr = info->heap_info_gpu_addr;
4641*4882a593Smuzhiyun kbase_jit_report_update_pressure(kctx, reg, info->va_pages,
4642*4882a593Smuzhiyun KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
4643*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4644*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4645*4882a593Smuzhiyun
4646*4882a593Smuzhiyun end:
4647*4882a593Smuzhiyun for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
4648*4882a593Smuzhiyun kfree(prealloc_sas[i]);
4649*4882a593Smuzhiyun
4650*4882a593Smuzhiyun return reg;
4651*4882a593Smuzhiyun }
4652*4882a593Smuzhiyun
kbase_jit_free(struct kbase_context * kctx,struct kbase_va_region * reg)4653*4882a593Smuzhiyun void kbase_jit_free(struct kbase_context *kctx, struct kbase_va_region *reg)
4654*4882a593Smuzhiyun {
4655*4882a593Smuzhiyun u64 old_pages;
4656*4882a593Smuzhiyun
4657*4882a593Smuzhiyun #if !MALI_USE_CSF
4658*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
4659*4882a593Smuzhiyun #else /* MALI_USE_CSF */
4660*4882a593Smuzhiyun lockdep_assert_held(&kctx->csf.kcpu_queues.jit_lock);
4661*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4662*4882a593Smuzhiyun
4663*4882a593Smuzhiyun /* JIT id not immediately available here, so use 0u */
4664*4882a593Smuzhiyun trace_mali_jit_free(reg, 0u);
4665*4882a593Smuzhiyun
4666*4882a593Smuzhiyun /* Get current size of JIT region */
4667*4882a593Smuzhiyun old_pages = kbase_reg_current_backed_size(reg);
4668*4882a593Smuzhiyun if (reg->initial_commit < old_pages) {
4669*4882a593Smuzhiyun /* Free trim_level % of region, but don't go below initial
4670*4882a593Smuzhiyun * commit size
4671*4882a593Smuzhiyun */
4672*4882a593Smuzhiyun u64 new_size = MAX(reg->initial_commit,
4673*4882a593Smuzhiyun div_u64(old_pages * (100 - kctx->trim_level), 100));
4674*4882a593Smuzhiyun u64 delta = old_pages - new_size;
4675*4882a593Smuzhiyun
4676*4882a593Smuzhiyun if (delta) {
4677*4882a593Smuzhiyun mutex_lock(&kctx->reg_lock);
4678*4882a593Smuzhiyun kbase_mem_shrink(kctx, reg, old_pages - delta);
4679*4882a593Smuzhiyun mutex_unlock(&kctx->reg_lock);
4680*4882a593Smuzhiyun }
4681*4882a593Smuzhiyun }
4682*4882a593Smuzhiyun
4683*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4684*4882a593Smuzhiyun reg->heap_info_gpu_addr = 0;
4685*4882a593Smuzhiyun kbase_jit_report_update_pressure(kctx, reg, 0,
4686*4882a593Smuzhiyun KBASE_JIT_REPORT_ON_ALLOC_OR_FREE);
4687*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4688*4882a593Smuzhiyun
4689*4882a593Smuzhiyun kctx->jit_current_allocations--;
4690*4882a593Smuzhiyun kctx->jit_current_allocations_per_bin[reg->jit_bin_id]--;
4691*4882a593Smuzhiyun
4692*4882a593Smuzhiyun trace_jit_stats(kctx, reg->jit_bin_id, UINT_MAX);
4693*4882a593Smuzhiyun
4694*4882a593Smuzhiyun kbase_mem_evictable_mark_reclaim(reg->gpu_alloc);
4695*4882a593Smuzhiyun
4696*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
4697*4882a593Smuzhiyun reg->flags |= KBASE_REG_DONT_NEED;
4698*4882a593Smuzhiyun reg->flags &= ~KBASE_REG_ACTIVE_JIT_ALLOC;
4699*4882a593Smuzhiyun kbase_mem_shrink_cpu_mapping(kctx, reg, 0, reg->gpu_alloc->nents);
4700*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4701*4882a593Smuzhiyun
4702*4882a593Smuzhiyun /*
4703*4882a593Smuzhiyun * Add the allocation to the eviction list and the jit pool, after this
4704*4882a593Smuzhiyun * point the shrink can reclaim it, or it may be reused.
4705*4882a593Smuzhiyun */
4706*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4707*4882a593Smuzhiyun
4708*4882a593Smuzhiyun /* This allocation can't already be on a list. */
4709*4882a593Smuzhiyun WARN_ON(!list_empty(®->gpu_alloc->evict_node));
4710*4882a593Smuzhiyun list_add(®->gpu_alloc->evict_node, &kctx->evict_list);
4711*4882a593Smuzhiyun atomic_add(reg->gpu_alloc->nents, &kctx->evict_nents);
4712*4882a593Smuzhiyun
4713*4882a593Smuzhiyun list_move(®->jit_node, &kctx->jit_pool_head);
4714*4882a593Smuzhiyun
4715*4882a593Smuzhiyun /* Inactive JIT regions should be freed by the shrinker and not impacted
4716*4882a593Smuzhiyun * by page migration. Once freed, they will enter into the page migration
4717*4882a593Smuzhiyun * state machine via the mempools.
4718*4882a593Smuzhiyun */
4719*4882a593Smuzhiyun if (kbase_page_migration_enabled)
4720*4882a593Smuzhiyun kbase_set_phy_alloc_page_status(reg->gpu_alloc, NOT_MOVABLE);
4721*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4722*4882a593Smuzhiyun }
4723*4882a593Smuzhiyun
kbase_jit_backing_lost(struct kbase_va_region * reg)4724*4882a593Smuzhiyun void kbase_jit_backing_lost(struct kbase_va_region *reg)
4725*4882a593Smuzhiyun {
4726*4882a593Smuzhiyun struct kbase_context *kctx = kbase_reg_flags_to_kctx(reg);
4727*4882a593Smuzhiyun
4728*4882a593Smuzhiyun if (WARN_ON(!kctx))
4729*4882a593Smuzhiyun return;
4730*4882a593Smuzhiyun
4731*4882a593Smuzhiyun lockdep_assert_held(&kctx->jit_evict_lock);
4732*4882a593Smuzhiyun
4733*4882a593Smuzhiyun /*
4734*4882a593Smuzhiyun * JIT allocations will always be on a list, if the region
4735*4882a593Smuzhiyun * is not on a list then it's not a JIT allocation.
4736*4882a593Smuzhiyun */
4737*4882a593Smuzhiyun if (list_empty(®->jit_node))
4738*4882a593Smuzhiyun return;
4739*4882a593Smuzhiyun
4740*4882a593Smuzhiyun /*
4741*4882a593Smuzhiyun * Freeing the allocation requires locks we might not be able
4742*4882a593Smuzhiyun * to take now, so move the allocation to the free list and kick
4743*4882a593Smuzhiyun * the worker which will do the freeing.
4744*4882a593Smuzhiyun */
4745*4882a593Smuzhiyun list_move(®->jit_node, &kctx->jit_destroy_head);
4746*4882a593Smuzhiyun
4747*4882a593Smuzhiyun schedule_work(&kctx->jit_work);
4748*4882a593Smuzhiyun }
4749*4882a593Smuzhiyun
kbase_jit_evict(struct kbase_context * kctx)4750*4882a593Smuzhiyun bool kbase_jit_evict(struct kbase_context *kctx)
4751*4882a593Smuzhiyun {
4752*4882a593Smuzhiyun struct kbase_va_region *reg = NULL;
4753*4882a593Smuzhiyun
4754*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
4755*4882a593Smuzhiyun
4756*4882a593Smuzhiyun /* Free the oldest allocation from the pool */
4757*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4758*4882a593Smuzhiyun if (!list_empty(&kctx->jit_pool_head)) {
4759*4882a593Smuzhiyun reg = list_entry(kctx->jit_pool_head.prev,
4760*4882a593Smuzhiyun struct kbase_va_region, jit_node);
4761*4882a593Smuzhiyun list_del(®->jit_node);
4762*4882a593Smuzhiyun list_del_init(®->gpu_alloc->evict_node);
4763*4882a593Smuzhiyun }
4764*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4765*4882a593Smuzhiyun
4766*4882a593Smuzhiyun if (reg) {
4767*4882a593Smuzhiyun /*
4768*4882a593Smuzhiyun * Incrementing the refcount is prevented on JIT regions.
4769*4882a593Smuzhiyun * If/when this ever changes we would need to compensate
4770*4882a593Smuzhiyun * by implementing "free on putting the last reference",
4771*4882a593Smuzhiyun * but only for JIT regions.
4772*4882a593Smuzhiyun */
4773*4882a593Smuzhiyun WARN_ON(atomic_read(®->no_user_free_count) > 1);
4774*4882a593Smuzhiyun kbase_va_region_no_user_free_dec(reg);
4775*4882a593Smuzhiyun kbase_mem_free_region(kctx, reg);
4776*4882a593Smuzhiyun }
4777*4882a593Smuzhiyun
4778*4882a593Smuzhiyun return (reg != NULL);
4779*4882a593Smuzhiyun }
4780*4882a593Smuzhiyun
kbase_jit_term(struct kbase_context * kctx)4781*4882a593Smuzhiyun void kbase_jit_term(struct kbase_context *kctx)
4782*4882a593Smuzhiyun {
4783*4882a593Smuzhiyun struct kbase_va_region *walker;
4784*4882a593Smuzhiyun
4785*4882a593Smuzhiyun /* Free all allocations for this context */
4786*4882a593Smuzhiyun
4787*4882a593Smuzhiyun kbase_gpu_vm_lock(kctx);
4788*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4789*4882a593Smuzhiyun /* Free all allocations from the pool */
4790*4882a593Smuzhiyun while (!list_empty(&kctx->jit_pool_head)) {
4791*4882a593Smuzhiyun walker = list_first_entry(&kctx->jit_pool_head,
4792*4882a593Smuzhiyun struct kbase_va_region, jit_node);
4793*4882a593Smuzhiyun list_del(&walker->jit_node);
4794*4882a593Smuzhiyun list_del_init(&walker->gpu_alloc->evict_node);
4795*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4796*4882a593Smuzhiyun /*
4797*4882a593Smuzhiyun * Incrementing the refcount is prevented on JIT regions.
4798*4882a593Smuzhiyun * If/when this ever changes we would need to compensate
4799*4882a593Smuzhiyun * by implementing "free on putting the last reference",
4800*4882a593Smuzhiyun * but only for JIT regions.
4801*4882a593Smuzhiyun */
4802*4882a593Smuzhiyun WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
4803*4882a593Smuzhiyun kbase_va_region_no_user_free_dec(walker);
4804*4882a593Smuzhiyun kbase_mem_free_region(kctx, walker);
4805*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4806*4882a593Smuzhiyun }
4807*4882a593Smuzhiyun
4808*4882a593Smuzhiyun /* Free all allocations from active list */
4809*4882a593Smuzhiyun while (!list_empty(&kctx->jit_active_head)) {
4810*4882a593Smuzhiyun walker = list_first_entry(&kctx->jit_active_head,
4811*4882a593Smuzhiyun struct kbase_va_region, jit_node);
4812*4882a593Smuzhiyun list_del(&walker->jit_node);
4813*4882a593Smuzhiyun list_del_init(&walker->gpu_alloc->evict_node);
4814*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4815*4882a593Smuzhiyun /*
4816*4882a593Smuzhiyun * Incrementing the refcount is prevented on JIT regions.
4817*4882a593Smuzhiyun * If/when this ever changes we would need to compensate
4818*4882a593Smuzhiyun * by implementing "free on putting the last reference",
4819*4882a593Smuzhiyun * but only for JIT regions.
4820*4882a593Smuzhiyun */
4821*4882a593Smuzhiyun WARN_ON(atomic_read(&walker->no_user_free_count) > 1);
4822*4882a593Smuzhiyun kbase_va_region_no_user_free_dec(walker);
4823*4882a593Smuzhiyun kbase_mem_free_region(kctx, walker);
4824*4882a593Smuzhiyun mutex_lock(&kctx->jit_evict_lock);
4825*4882a593Smuzhiyun }
4826*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
4827*4882a593Smuzhiyun WARN_ON(kctx->jit_phys_pages_to_be_allocated);
4828*4882a593Smuzhiyun #endif
4829*4882a593Smuzhiyun mutex_unlock(&kctx->jit_evict_lock);
4830*4882a593Smuzhiyun kbase_gpu_vm_unlock(kctx);
4831*4882a593Smuzhiyun
4832*4882a593Smuzhiyun /*
4833*4882a593Smuzhiyun * Flush the freeing of allocations whose backing has been freed
4834*4882a593Smuzhiyun * (i.e. everything in jit_destroy_head).
4835*4882a593Smuzhiyun */
4836*4882a593Smuzhiyun cancel_work_sync(&kctx->jit_work);
4837*4882a593Smuzhiyun }
4838*4882a593Smuzhiyun
4839*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context * kctx,struct kbase_va_region * reg,unsigned int flags)4840*4882a593Smuzhiyun void kbase_trace_jit_report_gpu_mem_trace_enabled(struct kbase_context *kctx,
4841*4882a593Smuzhiyun struct kbase_va_region *reg, unsigned int flags)
4842*4882a593Smuzhiyun {
4843*4882a593Smuzhiyun /* Offset to the location used for a JIT report within the GPU memory
4844*4882a593Smuzhiyun *
4845*4882a593Smuzhiyun * This constants only used for this debugging function - not useful
4846*4882a593Smuzhiyun * anywhere else in kbase
4847*4882a593Smuzhiyun */
4848*4882a593Smuzhiyun const u64 jit_report_gpu_mem_offset = sizeof(u64)*2;
4849*4882a593Smuzhiyun
4850*4882a593Smuzhiyun u64 addr_start;
4851*4882a593Smuzhiyun struct kbase_vmap_struct mapping;
4852*4882a593Smuzhiyun u64 *ptr;
4853*4882a593Smuzhiyun
4854*4882a593Smuzhiyun if (reg->heap_info_gpu_addr == 0ull)
4855*4882a593Smuzhiyun goto out;
4856*4882a593Smuzhiyun
4857*4882a593Smuzhiyun /* Nothing else to trace in the case the memory just contains the
4858*4882a593Smuzhiyun * size. Other tracepoints already record the relevant area of memory.
4859*4882a593Smuzhiyun */
4860*4882a593Smuzhiyun if (reg->flags & KBASE_REG_HEAP_INFO_IS_SIZE)
4861*4882a593Smuzhiyun goto out;
4862*4882a593Smuzhiyun
4863*4882a593Smuzhiyun addr_start = reg->heap_info_gpu_addr - jit_report_gpu_mem_offset;
4864*4882a593Smuzhiyun
4865*4882a593Smuzhiyun ptr = kbase_vmap_prot(kctx, addr_start, KBASE_JIT_REPORT_GPU_MEM_SIZE,
4866*4882a593Smuzhiyun KBASE_REG_CPU_RD, &mapping);
4867*4882a593Smuzhiyun if (!ptr) {
4868*4882a593Smuzhiyun dev_warn(kctx->kbdev->dev,
4869*4882a593Smuzhiyun "%s: JIT start=0x%llx unable to map memory near end pointer %llx\n",
4870*4882a593Smuzhiyun __func__, reg->start_pfn << PAGE_SHIFT,
4871*4882a593Smuzhiyun addr_start);
4872*4882a593Smuzhiyun goto out;
4873*4882a593Smuzhiyun }
4874*4882a593Smuzhiyun
4875*4882a593Smuzhiyun trace_mali_jit_report_gpu_mem(addr_start, reg->start_pfn << PAGE_SHIFT,
4876*4882a593Smuzhiyun ptr, flags);
4877*4882a593Smuzhiyun
4878*4882a593Smuzhiyun kbase_vunmap(kctx, &mapping);
4879*4882a593Smuzhiyun out:
4880*4882a593Smuzhiyun return;
4881*4882a593Smuzhiyun }
4882*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4883*4882a593Smuzhiyun
4884*4882a593Smuzhiyun #if MALI_JIT_PRESSURE_LIMIT_BASE
kbase_jit_report_update_pressure(struct kbase_context * kctx,struct kbase_va_region * reg,u64 new_used_pages,unsigned int flags)4885*4882a593Smuzhiyun void kbase_jit_report_update_pressure(struct kbase_context *kctx,
4886*4882a593Smuzhiyun struct kbase_va_region *reg, u64 new_used_pages,
4887*4882a593Smuzhiyun unsigned int flags)
4888*4882a593Smuzhiyun {
4889*4882a593Smuzhiyun u64 diff;
4890*4882a593Smuzhiyun
4891*4882a593Smuzhiyun #if !MALI_USE_CSF
4892*4882a593Smuzhiyun lockdep_assert_held(&kctx->jctx.lock);
4893*4882a593Smuzhiyun #endif /* !MALI_USE_CSF */
4894*4882a593Smuzhiyun
4895*4882a593Smuzhiyun trace_mali_jit_report_pressure(reg, new_used_pages,
4896*4882a593Smuzhiyun kctx->jit_current_phys_pressure + new_used_pages -
4897*4882a593Smuzhiyun reg->used_pages,
4898*4882a593Smuzhiyun flags);
4899*4882a593Smuzhiyun
4900*4882a593Smuzhiyun if (WARN_ON(new_used_pages > reg->nr_pages))
4901*4882a593Smuzhiyun return;
4902*4882a593Smuzhiyun
4903*4882a593Smuzhiyun if (reg->used_pages > new_used_pages) {
4904*4882a593Smuzhiyun /* We reduced the number of used pages */
4905*4882a593Smuzhiyun diff = reg->used_pages - new_used_pages;
4906*4882a593Smuzhiyun
4907*4882a593Smuzhiyun if (!WARN_ON(diff > kctx->jit_current_phys_pressure))
4908*4882a593Smuzhiyun kctx->jit_current_phys_pressure -= diff;
4909*4882a593Smuzhiyun
4910*4882a593Smuzhiyun reg->used_pages = new_used_pages;
4911*4882a593Smuzhiyun } else {
4912*4882a593Smuzhiyun /* We increased the number of used pages */
4913*4882a593Smuzhiyun diff = new_used_pages - reg->used_pages;
4914*4882a593Smuzhiyun
4915*4882a593Smuzhiyun if (!WARN_ON(diff > U64_MAX - kctx->jit_current_phys_pressure))
4916*4882a593Smuzhiyun kctx->jit_current_phys_pressure += diff;
4917*4882a593Smuzhiyun
4918*4882a593Smuzhiyun reg->used_pages = new_used_pages;
4919*4882a593Smuzhiyun }
4920*4882a593Smuzhiyun
4921*4882a593Smuzhiyun }
4922*4882a593Smuzhiyun #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
4923*4882a593Smuzhiyun
kbase_unpin_user_buf_page(struct page * page)4924*4882a593Smuzhiyun void kbase_unpin_user_buf_page(struct page *page)
4925*4882a593Smuzhiyun {
4926*4882a593Smuzhiyun #if KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
4927*4882a593Smuzhiyun put_page(page);
4928*4882a593Smuzhiyun #else
4929*4882a593Smuzhiyun unpin_user_page(page);
4930*4882a593Smuzhiyun #endif
4931*4882a593Smuzhiyun }
4932*4882a593Smuzhiyun
4933*4882a593Smuzhiyun #if MALI_USE_CSF
kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc * alloc)4934*4882a593Smuzhiyun static void kbase_jd_user_buf_unpin_pages(struct kbase_mem_phy_alloc *alloc)
4935*4882a593Smuzhiyun {
4936*4882a593Smuzhiyun /* In CSF builds, we keep pages pinned until the last reference is
4937*4882a593Smuzhiyun * released on the alloc. A refcount of 0 also means we can be sure
4938*4882a593Smuzhiyun * that all CPU mappings have been closed on this alloc, and no more
4939*4882a593Smuzhiyun * mappings of it will be created.
4940*4882a593Smuzhiyun *
4941*4882a593Smuzhiyun * Further, the WARN() below captures the restriction that this
4942*4882a593Smuzhiyun * function will not handle anything other than the alloc termination
4943*4882a593Smuzhiyun * path, because the caller of kbase_mem_phy_alloc_put() is not
4944*4882a593Smuzhiyun * required to hold the kctx's reg_lock, and so we could not handle
4945*4882a593Smuzhiyun * removing an existing CPU mapping here.
4946*4882a593Smuzhiyun *
4947*4882a593Smuzhiyun * Refer to this function's kernel-doc comments for alternatives for
4948*4882a593Smuzhiyun * unpinning a User buffer.
4949*4882a593Smuzhiyun */
4950*4882a593Smuzhiyun
4951*4882a593Smuzhiyun if (alloc->nents && !WARN(kref_read(&alloc->kref) != 0,
4952*4882a593Smuzhiyun "must only be called on terminating an allocation")) {
4953*4882a593Smuzhiyun struct page **pages = alloc->imported.user_buf.pages;
4954*4882a593Smuzhiyun long i;
4955*4882a593Smuzhiyun
4956*4882a593Smuzhiyun WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages);
4957*4882a593Smuzhiyun
4958*4882a593Smuzhiyun for (i = 0; i < alloc->nents; i++)
4959*4882a593Smuzhiyun kbase_unpin_user_buf_page(pages[i]);
4960*4882a593Smuzhiyun
4961*4882a593Smuzhiyun alloc->nents = 0;
4962*4882a593Smuzhiyun }
4963*4882a593Smuzhiyun }
4964*4882a593Smuzhiyun #endif
4965*4882a593Smuzhiyun
kbase_jd_user_buf_pin_pages(struct kbase_context * kctx,struct kbase_va_region * reg)4966*4882a593Smuzhiyun int kbase_jd_user_buf_pin_pages(struct kbase_context *kctx,
4967*4882a593Smuzhiyun struct kbase_va_region *reg)
4968*4882a593Smuzhiyun {
4969*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
4970*4882a593Smuzhiyun struct page **pages = alloc->imported.user_buf.pages;
4971*4882a593Smuzhiyun unsigned long address = alloc->imported.user_buf.address;
4972*4882a593Smuzhiyun struct mm_struct *mm = alloc->imported.user_buf.mm;
4973*4882a593Smuzhiyun long pinned_pages;
4974*4882a593Smuzhiyun long i;
4975*4882a593Smuzhiyun int write;
4976*4882a593Smuzhiyun
4977*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
4978*4882a593Smuzhiyun
4979*4882a593Smuzhiyun if (WARN_ON(alloc->type != KBASE_MEM_TYPE_IMPORTED_USER_BUF))
4980*4882a593Smuzhiyun return -EINVAL;
4981*4882a593Smuzhiyun
4982*4882a593Smuzhiyun if (alloc->nents) {
4983*4882a593Smuzhiyun if (WARN_ON(alloc->nents != alloc->imported.user_buf.nr_pages))
4984*4882a593Smuzhiyun return -EINVAL;
4985*4882a593Smuzhiyun else
4986*4882a593Smuzhiyun return 0;
4987*4882a593Smuzhiyun }
4988*4882a593Smuzhiyun
4989*4882a593Smuzhiyun if (WARN_ON(reg->gpu_alloc->imported.user_buf.mm != current->mm))
4990*4882a593Smuzhiyun return -EINVAL;
4991*4882a593Smuzhiyun
4992*4882a593Smuzhiyun write = reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR);
4993*4882a593Smuzhiyun
4994*4882a593Smuzhiyun #if KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE
4995*4882a593Smuzhiyun pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
4996*4882a593Smuzhiyun write ? FOLL_WRITE : 0, pages, NULL);
4997*4882a593Smuzhiyun #elif KERNEL_VERSION(5, 9, 0) > LINUX_VERSION_CODE
4998*4882a593Smuzhiyun pinned_pages = get_user_pages_remote(NULL, mm, address, alloc->imported.user_buf.nr_pages,
4999*4882a593Smuzhiyun write ? FOLL_WRITE : 0, pages, NULL, NULL);
5000*4882a593Smuzhiyun #else
5001*4882a593Smuzhiyun pinned_pages = pin_user_pages_remote(mm, address, alloc->imported.user_buf.nr_pages,
5002*4882a593Smuzhiyun write ? FOLL_WRITE : 0, pages, NULL, NULL);
5003*4882a593Smuzhiyun #endif
5004*4882a593Smuzhiyun
5005*4882a593Smuzhiyun if (pinned_pages <= 0)
5006*4882a593Smuzhiyun return pinned_pages;
5007*4882a593Smuzhiyun
5008*4882a593Smuzhiyun if (pinned_pages != alloc->imported.user_buf.nr_pages) {
5009*4882a593Smuzhiyun /* Above code already ensures there will not have been a CPU
5010*4882a593Smuzhiyun * mapping by ensuring alloc->nents is 0
5011*4882a593Smuzhiyun */
5012*4882a593Smuzhiyun for (i = 0; i < pinned_pages; i++)
5013*4882a593Smuzhiyun kbase_unpin_user_buf_page(pages[i]);
5014*4882a593Smuzhiyun return -ENOMEM;
5015*4882a593Smuzhiyun }
5016*4882a593Smuzhiyun
5017*4882a593Smuzhiyun alloc->nents = pinned_pages;
5018*4882a593Smuzhiyun
5019*4882a593Smuzhiyun return 0;
5020*4882a593Smuzhiyun }
5021*4882a593Smuzhiyun
kbase_jd_user_buf_map(struct kbase_context * kctx,struct kbase_va_region * reg)5022*4882a593Smuzhiyun static int kbase_jd_user_buf_map(struct kbase_context *kctx,
5023*4882a593Smuzhiyun struct kbase_va_region *reg)
5024*4882a593Smuzhiyun {
5025*4882a593Smuzhiyun int err;
5026*4882a593Smuzhiyun long pinned_pages = 0;
5027*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc;
5028*4882a593Smuzhiyun struct page **pages;
5029*4882a593Smuzhiyun struct tagged_addr *pa;
5030*4882a593Smuzhiyun long i, dma_mapped_pages;
5031*4882a593Smuzhiyun struct device *dev;
5032*4882a593Smuzhiyun unsigned long gwt_mask = ~0;
5033*4882a593Smuzhiyun /* Calls to this function are inherently asynchronous, with respect to
5034*4882a593Smuzhiyun * MMU operations.
5035*4882a593Smuzhiyun */
5036*4882a593Smuzhiyun const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
5037*4882a593Smuzhiyun
5038*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5039*4882a593Smuzhiyun
5040*4882a593Smuzhiyun err = kbase_jd_user_buf_pin_pages(kctx, reg);
5041*4882a593Smuzhiyun
5042*4882a593Smuzhiyun if (err)
5043*4882a593Smuzhiyun return err;
5044*4882a593Smuzhiyun
5045*4882a593Smuzhiyun alloc = reg->gpu_alloc;
5046*4882a593Smuzhiyun pa = kbase_get_gpu_phy_pages(reg);
5047*4882a593Smuzhiyun pinned_pages = alloc->nents;
5048*4882a593Smuzhiyun pages = alloc->imported.user_buf.pages;
5049*4882a593Smuzhiyun dev = kctx->kbdev->dev;
5050*4882a593Smuzhiyun
5051*4882a593Smuzhiyun /* Manual CPU cache synchronization.
5052*4882a593Smuzhiyun *
5053*4882a593Smuzhiyun * The driver disables automatic CPU cache synchronization because the
5054*4882a593Smuzhiyun * memory pages that enclose the imported region may also contain
5055*4882a593Smuzhiyun * sub-regions which are not imported and that are allocated and used
5056*4882a593Smuzhiyun * by the user process. This may be the case of memory at the beginning
5057*4882a593Smuzhiyun * of the first page and at the end of the last page. Automatic CPU cache
5058*4882a593Smuzhiyun * synchronization would force some operations on those memory allocations,
5059*4882a593Smuzhiyun * unbeknown to the user process: in particular, a CPU cache invalidate
5060*4882a593Smuzhiyun * upon unmapping would destroy the content of dirty CPU caches and cause
5061*4882a593Smuzhiyun * the user process to lose CPU writes to the non-imported sub-regions.
5062*4882a593Smuzhiyun *
5063*4882a593Smuzhiyun * When the GPU claims ownership of the imported memory buffer, it shall
5064*4882a593Smuzhiyun * commit CPU writes for the whole of all pages that enclose the imported
5065*4882a593Smuzhiyun * region, otherwise the initial content of memory would be wrong.
5066*4882a593Smuzhiyun */
5067*4882a593Smuzhiyun for (i = 0; i < pinned_pages; i++) {
5068*4882a593Smuzhiyun dma_addr_t dma_addr;
5069*4882a593Smuzhiyun #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
5070*4882a593Smuzhiyun dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
5071*4882a593Smuzhiyun #else
5072*4882a593Smuzhiyun dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
5073*4882a593Smuzhiyun DMA_ATTR_SKIP_CPU_SYNC);
5074*4882a593Smuzhiyun #endif
5075*4882a593Smuzhiyun err = dma_mapping_error(dev, dma_addr);
5076*4882a593Smuzhiyun if (err)
5077*4882a593Smuzhiyun goto unwind;
5078*4882a593Smuzhiyun
5079*4882a593Smuzhiyun alloc->imported.user_buf.dma_addrs[i] = dma_addr;
5080*4882a593Smuzhiyun pa[i] = as_tagged(page_to_phys(pages[i]));
5081*4882a593Smuzhiyun
5082*4882a593Smuzhiyun dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
5083*4882a593Smuzhiyun }
5084*4882a593Smuzhiyun
5085*4882a593Smuzhiyun #ifdef CONFIG_MALI_CINSTR_GWT
5086*4882a593Smuzhiyun if (kctx->gwt_enabled)
5087*4882a593Smuzhiyun gwt_mask = ~KBASE_REG_GPU_WR;
5088*4882a593Smuzhiyun #endif
5089*4882a593Smuzhiyun
5090*4882a593Smuzhiyun err = kbase_mmu_insert_imported_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn, pa,
5091*4882a593Smuzhiyun kbase_reg_current_backed_size(reg),
5092*4882a593Smuzhiyun reg->flags & gwt_mask, kctx->as_nr, alloc->group_id,
5093*4882a593Smuzhiyun mmu_sync_info, NULL);
5094*4882a593Smuzhiyun if (err == 0)
5095*4882a593Smuzhiyun return 0;
5096*4882a593Smuzhiyun
5097*4882a593Smuzhiyun /* fall down */
5098*4882a593Smuzhiyun unwind:
5099*4882a593Smuzhiyun alloc->nents = 0;
5100*4882a593Smuzhiyun dma_mapped_pages = i;
5101*4882a593Smuzhiyun /* Run the unmap loop in the same order as map loop, and perform again
5102*4882a593Smuzhiyun * CPU cache synchronization to re-write the content of dirty CPU caches
5103*4882a593Smuzhiyun * to memory. This is precautionary measure in case a GPU job has taken
5104*4882a593Smuzhiyun * advantage of a partially GPU-mapped range to write and corrupt the
5105*4882a593Smuzhiyun * content of memory, either inside or outside the imported region.
5106*4882a593Smuzhiyun *
5107*4882a593Smuzhiyun * Notice that this error recovery path doesn't try to be optimal and just
5108*4882a593Smuzhiyun * flushes the entire page range.
5109*4882a593Smuzhiyun */
5110*4882a593Smuzhiyun for (i = 0; i < dma_mapped_pages; i++) {
5111*4882a593Smuzhiyun dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
5112*4882a593Smuzhiyun
5113*4882a593Smuzhiyun dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
5114*4882a593Smuzhiyun #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
5115*4882a593Smuzhiyun dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
5116*4882a593Smuzhiyun #else
5117*4882a593Smuzhiyun dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
5118*4882a593Smuzhiyun DMA_ATTR_SKIP_CPU_SYNC);
5119*4882a593Smuzhiyun #endif
5120*4882a593Smuzhiyun }
5121*4882a593Smuzhiyun
5122*4882a593Smuzhiyun /* The user buffer could already have been previously pinned before
5123*4882a593Smuzhiyun * entering this function, and hence there could potentially be CPU
5124*4882a593Smuzhiyun * mappings of it
5125*4882a593Smuzhiyun */
5126*4882a593Smuzhiyun kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages);
5127*4882a593Smuzhiyun
5128*4882a593Smuzhiyun for (i = 0; i < pinned_pages; i++) {
5129*4882a593Smuzhiyun kbase_unpin_user_buf_page(pages[i]);
5130*4882a593Smuzhiyun pages[i] = NULL;
5131*4882a593Smuzhiyun }
5132*4882a593Smuzhiyun
5133*4882a593Smuzhiyun return err;
5134*4882a593Smuzhiyun }
5135*4882a593Smuzhiyun
5136*4882a593Smuzhiyun /* This function would also perform the work of unpinning pages on Job Manager
5137*4882a593Smuzhiyun * GPUs, which implies that a call to kbase_jd_user_buf_pin_pages() will NOT
5138*4882a593Smuzhiyun * have a corresponding call to kbase_jd_user_buf_unpin_pages().
5139*4882a593Smuzhiyun */
kbase_jd_user_buf_unmap(struct kbase_context * kctx,struct kbase_mem_phy_alloc * alloc,struct kbase_va_region * reg,bool writeable)5140*4882a593Smuzhiyun static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem_phy_alloc *alloc,
5141*4882a593Smuzhiyun struct kbase_va_region *reg, bool writeable)
5142*4882a593Smuzhiyun {
5143*4882a593Smuzhiyun long i;
5144*4882a593Smuzhiyun struct page **pages;
5145*4882a593Smuzhiyun unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
5146*4882a593Smuzhiyun unsigned long remaining_size = alloc->imported.user_buf.size;
5147*4882a593Smuzhiyun
5148*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5149*4882a593Smuzhiyun
5150*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(alloc->type == KBASE_MEM_TYPE_IMPORTED_USER_BUF);
5151*4882a593Smuzhiyun pages = alloc->imported.user_buf.pages;
5152*4882a593Smuzhiyun
5153*4882a593Smuzhiyun #if !MALI_USE_CSF
5154*4882a593Smuzhiyun kbase_mem_shrink_cpu_mapping(kctx, reg, 0, alloc->nents);
5155*4882a593Smuzhiyun #else
5156*4882a593Smuzhiyun CSTD_UNUSED(reg);
5157*4882a593Smuzhiyun #endif
5158*4882a593Smuzhiyun
5159*4882a593Smuzhiyun for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
5160*4882a593Smuzhiyun unsigned long imported_size = MIN(remaining_size, PAGE_SIZE - offset_within_page);
5161*4882a593Smuzhiyun /* Notice: this is a temporary variable that is used for DMA sync
5162*4882a593Smuzhiyun * operations, and that could be incremented by an offset if the
5163*4882a593Smuzhiyun * current page contains both imported and non-imported memory
5164*4882a593Smuzhiyun * sub-regions.
5165*4882a593Smuzhiyun *
5166*4882a593Smuzhiyun * It is valid to add an offset to this value, because the offset
5167*4882a593Smuzhiyun * is always kept within the physically contiguous dma-mapped range
5168*4882a593Smuzhiyun * and there's no need to translate to physical address to offset it.
5169*4882a593Smuzhiyun *
5170*4882a593Smuzhiyun * This variable is not going to be used for the actual DMA unmap
5171*4882a593Smuzhiyun * operation, that shall always use the original DMA address of the
5172*4882a593Smuzhiyun * whole memory page.
5173*4882a593Smuzhiyun */
5174*4882a593Smuzhiyun dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
5175*4882a593Smuzhiyun
5176*4882a593Smuzhiyun /* Manual CPU cache synchronization.
5177*4882a593Smuzhiyun *
5178*4882a593Smuzhiyun * When the GPU returns ownership of the buffer to the CPU, the driver
5179*4882a593Smuzhiyun * needs to treat imported and non-imported memory differently.
5180*4882a593Smuzhiyun *
5181*4882a593Smuzhiyun * The first case to consider is non-imported sub-regions at the
5182*4882a593Smuzhiyun * beginning of the first page and at the end of last page. For these
5183*4882a593Smuzhiyun * sub-regions: CPU cache shall be committed with a clean+invalidate,
5184*4882a593Smuzhiyun * in order to keep the last CPU write.
5185*4882a593Smuzhiyun *
5186*4882a593Smuzhiyun * Imported region prefers the opposite treatment: this memory has been
5187*4882a593Smuzhiyun * legitimately mapped and used by the GPU, hence GPU writes shall be
5188*4882a593Smuzhiyun * committed to memory, while CPU cache shall be invalidated to make
5189*4882a593Smuzhiyun * sure that CPU reads the correct memory content.
5190*4882a593Smuzhiyun *
5191*4882a593Smuzhiyun * The following diagram shows the expect value of the variables
5192*4882a593Smuzhiyun * used in this loop in the corner case of an imported region encloed
5193*4882a593Smuzhiyun * by a single memory page:
5194*4882a593Smuzhiyun *
5195*4882a593Smuzhiyun * page boundary ->|---------- | <- dma_addr (initial value)
5196*4882a593Smuzhiyun * | |
5197*4882a593Smuzhiyun * | - - - - - | <- offset_within_page
5198*4882a593Smuzhiyun * |XXXXXXXXXXX|\
5199*4882a593Smuzhiyun * |XXXXXXXXXXX| \
5200*4882a593Smuzhiyun * |XXXXXXXXXXX| }- imported_size
5201*4882a593Smuzhiyun * |XXXXXXXXXXX| /
5202*4882a593Smuzhiyun * |XXXXXXXXXXX|/
5203*4882a593Smuzhiyun * | - - - - - | <- offset_within_page + imported_size
5204*4882a593Smuzhiyun * | |\
5205*4882a593Smuzhiyun * | | }- PAGE_SIZE - imported_size - offset_within_page
5206*4882a593Smuzhiyun * | |/
5207*4882a593Smuzhiyun * page boundary ->|-----------|
5208*4882a593Smuzhiyun *
5209*4882a593Smuzhiyun * If the imported region is enclosed by more than one page, then
5210*4882a593Smuzhiyun * offset_within_page = 0 for any page after the first.
5211*4882a593Smuzhiyun */
5212*4882a593Smuzhiyun
5213*4882a593Smuzhiyun /* Only for first page: handle non-imported range at the beginning. */
5214*4882a593Smuzhiyun if (offset_within_page > 0) {
5215*4882a593Smuzhiyun dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
5216*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
5217*4882a593Smuzhiyun dma_addr += offset_within_page;
5218*4882a593Smuzhiyun }
5219*4882a593Smuzhiyun
5220*4882a593Smuzhiyun /* For every page: handle imported range. */
5221*4882a593Smuzhiyun if (imported_size > 0)
5222*4882a593Smuzhiyun dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
5223*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
5224*4882a593Smuzhiyun
5225*4882a593Smuzhiyun /* Only for last page (that may coincide with first page):
5226*4882a593Smuzhiyun * handle non-imported range at the end.
5227*4882a593Smuzhiyun */
5228*4882a593Smuzhiyun if ((imported_size + offset_within_page) < PAGE_SIZE) {
5229*4882a593Smuzhiyun dma_addr += imported_size;
5230*4882a593Smuzhiyun dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
5231*4882a593Smuzhiyun PAGE_SIZE - imported_size - offset_within_page,
5232*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
5233*4882a593Smuzhiyun }
5234*4882a593Smuzhiyun
5235*4882a593Smuzhiyun /* Notice: use the original DMA address to unmap the whole memory page. */
5236*4882a593Smuzhiyun #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
5237*4882a593Smuzhiyun dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
5238*4882a593Smuzhiyun DMA_BIDIRECTIONAL);
5239*4882a593Smuzhiyun #else
5240*4882a593Smuzhiyun dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
5241*4882a593Smuzhiyun PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
5242*4882a593Smuzhiyun #endif
5243*4882a593Smuzhiyun if (writeable)
5244*4882a593Smuzhiyun set_page_dirty_lock(pages[i]);
5245*4882a593Smuzhiyun #if !MALI_USE_CSF
5246*4882a593Smuzhiyun kbase_unpin_user_buf_page(pages[i]);
5247*4882a593Smuzhiyun pages[i] = NULL;
5248*4882a593Smuzhiyun #endif
5249*4882a593Smuzhiyun
5250*4882a593Smuzhiyun remaining_size -= imported_size;
5251*4882a593Smuzhiyun offset_within_page = 0;
5252*4882a593Smuzhiyun }
5253*4882a593Smuzhiyun #if !MALI_USE_CSF
5254*4882a593Smuzhiyun alloc->nents = 0;
5255*4882a593Smuzhiyun #endif
5256*4882a593Smuzhiyun }
5257*4882a593Smuzhiyun
kbase_mem_copy_to_pinned_user_pages(struct page ** dest_pages,void * src_page,size_t * to_copy,unsigned int nr_pages,unsigned int * target_page_nr,size_t offset)5258*4882a593Smuzhiyun int kbase_mem_copy_to_pinned_user_pages(struct page **dest_pages,
5259*4882a593Smuzhiyun void *src_page, size_t *to_copy, unsigned int nr_pages,
5260*4882a593Smuzhiyun unsigned int *target_page_nr, size_t offset)
5261*4882a593Smuzhiyun {
5262*4882a593Smuzhiyun void *target_page = kmap(dest_pages[*target_page_nr]);
5263*4882a593Smuzhiyun size_t chunk = PAGE_SIZE-offset;
5264*4882a593Smuzhiyun
5265*4882a593Smuzhiyun if (!target_page) {
5266*4882a593Smuzhiyun pr_err("%s: kmap failure", __func__);
5267*4882a593Smuzhiyun return -ENOMEM;
5268*4882a593Smuzhiyun }
5269*4882a593Smuzhiyun
5270*4882a593Smuzhiyun chunk = min(chunk, *to_copy);
5271*4882a593Smuzhiyun
5272*4882a593Smuzhiyun memcpy(target_page + offset, src_page, chunk);
5273*4882a593Smuzhiyun *to_copy -= chunk;
5274*4882a593Smuzhiyun
5275*4882a593Smuzhiyun kunmap(dest_pages[*target_page_nr]);
5276*4882a593Smuzhiyun
5277*4882a593Smuzhiyun *target_page_nr += 1;
5278*4882a593Smuzhiyun if (*target_page_nr >= nr_pages || *to_copy == 0)
5279*4882a593Smuzhiyun return 0;
5280*4882a593Smuzhiyun
5281*4882a593Smuzhiyun target_page = kmap(dest_pages[*target_page_nr]);
5282*4882a593Smuzhiyun if (!target_page) {
5283*4882a593Smuzhiyun pr_err("%s: kmap failure", __func__);
5284*4882a593Smuzhiyun return -ENOMEM;
5285*4882a593Smuzhiyun }
5286*4882a593Smuzhiyun
5287*4882a593Smuzhiyun KBASE_DEBUG_ASSERT(target_page);
5288*4882a593Smuzhiyun
5289*4882a593Smuzhiyun chunk = min(offset, *to_copy);
5290*4882a593Smuzhiyun memcpy(target_page, src_page + PAGE_SIZE-offset, chunk);
5291*4882a593Smuzhiyun *to_copy -= chunk;
5292*4882a593Smuzhiyun
5293*4882a593Smuzhiyun kunmap(dest_pages[*target_page_nr]);
5294*4882a593Smuzhiyun
5295*4882a593Smuzhiyun return 0;
5296*4882a593Smuzhiyun }
5297*4882a593Smuzhiyun
kbase_map_external_resource(struct kbase_context * kctx,struct kbase_va_region * reg,struct mm_struct * locked_mm)5298*4882a593Smuzhiyun int kbase_map_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg,
5299*4882a593Smuzhiyun struct mm_struct *locked_mm)
5300*4882a593Smuzhiyun {
5301*4882a593Smuzhiyun int err = 0;
5302*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
5303*4882a593Smuzhiyun
5304*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5305*4882a593Smuzhiyun
5306*4882a593Smuzhiyun /* decide what needs to happen for this resource */
5307*4882a593Smuzhiyun switch (reg->gpu_alloc->type) {
5308*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
5309*4882a593Smuzhiyun if ((reg->gpu_alloc->imported.user_buf.mm != locked_mm) &&
5310*4882a593Smuzhiyun (!reg->gpu_alloc->nents))
5311*4882a593Smuzhiyun return -EINVAL;
5312*4882a593Smuzhiyun
5313*4882a593Smuzhiyun reg->gpu_alloc->imported.user_buf.current_mapping_usage_count++;
5314*4882a593Smuzhiyun if (reg->gpu_alloc->imported.user_buf
5315*4882a593Smuzhiyun .current_mapping_usage_count == 1) {
5316*4882a593Smuzhiyun err = kbase_jd_user_buf_map(kctx, reg);
5317*4882a593Smuzhiyun if (err) {
5318*4882a593Smuzhiyun reg->gpu_alloc->imported.user_buf.current_mapping_usage_count--;
5319*4882a593Smuzhiyun return err;
5320*4882a593Smuzhiyun }
5321*4882a593Smuzhiyun }
5322*4882a593Smuzhiyun }
5323*4882a593Smuzhiyun break;
5324*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_UMM: {
5325*4882a593Smuzhiyun err = kbase_mem_umm_map(kctx, reg);
5326*4882a593Smuzhiyun if (err)
5327*4882a593Smuzhiyun return err;
5328*4882a593Smuzhiyun break;
5329*4882a593Smuzhiyun }
5330*4882a593Smuzhiyun default:
5331*4882a593Smuzhiyun dev_dbg(kctx->kbdev->dev,
5332*4882a593Smuzhiyun "Invalid external resource GPU allocation type (%x) on mapping",
5333*4882a593Smuzhiyun alloc->type);
5334*4882a593Smuzhiyun return -EINVAL;
5335*4882a593Smuzhiyun }
5336*4882a593Smuzhiyun
5337*4882a593Smuzhiyun kbase_va_region_alloc_get(kctx, reg);
5338*4882a593Smuzhiyun kbase_mem_phy_alloc_get(alloc);
5339*4882a593Smuzhiyun return err;
5340*4882a593Smuzhiyun }
5341*4882a593Smuzhiyun
kbase_unmap_external_resource(struct kbase_context * kctx,struct kbase_va_region * reg)5342*4882a593Smuzhiyun void kbase_unmap_external_resource(struct kbase_context *kctx, struct kbase_va_region *reg)
5343*4882a593Smuzhiyun {
5344*4882a593Smuzhiyun /* gpu_alloc was used in kbase_map_external_resources, so we need to use it for the
5345*4882a593Smuzhiyun * unmapping operation.
5346*4882a593Smuzhiyun */
5347*4882a593Smuzhiyun struct kbase_mem_phy_alloc *alloc = reg->gpu_alloc;
5348*4882a593Smuzhiyun
5349*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5350*4882a593Smuzhiyun
5351*4882a593Smuzhiyun switch (alloc->type) {
5352*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_UMM: {
5353*4882a593Smuzhiyun kbase_mem_umm_unmap(kctx, reg, alloc);
5354*4882a593Smuzhiyun }
5355*4882a593Smuzhiyun break;
5356*4882a593Smuzhiyun case KBASE_MEM_TYPE_IMPORTED_USER_BUF: {
5357*4882a593Smuzhiyun alloc->imported.user_buf.current_mapping_usage_count--;
5358*4882a593Smuzhiyun
5359*4882a593Smuzhiyun if (alloc->imported.user_buf.current_mapping_usage_count == 0) {
5360*4882a593Smuzhiyun bool writeable = true;
5361*4882a593Smuzhiyun
5362*4882a593Smuzhiyun if (!kbase_is_region_invalid_or_free(reg)) {
5363*4882a593Smuzhiyun kbase_mmu_teardown_pages(kctx->kbdev, &kctx->mmu, reg->start_pfn,
5364*4882a593Smuzhiyun alloc->pages,
5365*4882a593Smuzhiyun kbase_reg_current_backed_size(reg),
5366*4882a593Smuzhiyun kbase_reg_current_backed_size(reg),
5367*4882a593Smuzhiyun kctx->as_nr, true);
5368*4882a593Smuzhiyun }
5369*4882a593Smuzhiyun
5370*4882a593Smuzhiyun if ((reg->flags & (KBASE_REG_CPU_WR | KBASE_REG_GPU_WR)) == 0)
5371*4882a593Smuzhiyun writeable = false;
5372*4882a593Smuzhiyun
5373*4882a593Smuzhiyun kbase_jd_user_buf_unmap(kctx, alloc, reg, writeable);
5374*4882a593Smuzhiyun }
5375*4882a593Smuzhiyun }
5376*4882a593Smuzhiyun break;
5377*4882a593Smuzhiyun default:
5378*4882a593Smuzhiyun WARN(1, "Invalid external resource GPU allocation type (%x) on unmapping",
5379*4882a593Smuzhiyun alloc->type);
5380*4882a593Smuzhiyun return;
5381*4882a593Smuzhiyun }
5382*4882a593Smuzhiyun kbase_mem_phy_alloc_put(alloc);
5383*4882a593Smuzhiyun kbase_va_region_alloc_put(kctx, reg);
5384*4882a593Smuzhiyun }
5385*4882a593Smuzhiyun
kbasep_get_va_gpu_addr(struct kbase_va_region * reg)5386*4882a593Smuzhiyun static inline u64 kbasep_get_va_gpu_addr(struct kbase_va_region *reg)
5387*4882a593Smuzhiyun {
5388*4882a593Smuzhiyun return reg->start_pfn << PAGE_SHIFT;
5389*4882a593Smuzhiyun }
5390*4882a593Smuzhiyun
kbase_sticky_resource_acquire(struct kbase_context * kctx,u64 gpu_addr)5391*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *kbase_sticky_resource_acquire(
5392*4882a593Smuzhiyun struct kbase_context *kctx, u64 gpu_addr)
5393*4882a593Smuzhiyun {
5394*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *meta = NULL;
5395*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *walker;
5396*4882a593Smuzhiyun
5397*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5398*4882a593Smuzhiyun
5399*4882a593Smuzhiyun /*
5400*4882a593Smuzhiyun * Walk the per context external resource metadata list for the
5401*4882a593Smuzhiyun * metadata which matches the region which is being acquired.
5402*4882a593Smuzhiyun */
5403*4882a593Smuzhiyun list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node) {
5404*4882a593Smuzhiyun if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr) {
5405*4882a593Smuzhiyun meta = walker;
5406*4882a593Smuzhiyun meta->ref++;
5407*4882a593Smuzhiyun break;
5408*4882a593Smuzhiyun }
5409*4882a593Smuzhiyun }
5410*4882a593Smuzhiyun
5411*4882a593Smuzhiyun /* No metadata exists so create one. */
5412*4882a593Smuzhiyun if (!meta) {
5413*4882a593Smuzhiyun struct kbase_va_region *reg;
5414*4882a593Smuzhiyun
5415*4882a593Smuzhiyun /* Find the region */
5416*4882a593Smuzhiyun reg = kbase_region_tracker_find_region_enclosing_address(kctx, gpu_addr);
5417*4882a593Smuzhiyun if (kbase_is_region_invalid_or_free(reg))
5418*4882a593Smuzhiyun goto failed;
5419*4882a593Smuzhiyun
5420*4882a593Smuzhiyun /* Allocate the metadata object */
5421*4882a593Smuzhiyun meta = kzalloc(sizeof(*meta), GFP_KERNEL);
5422*4882a593Smuzhiyun if (!meta)
5423*4882a593Smuzhiyun goto failed;
5424*4882a593Smuzhiyun /*
5425*4882a593Smuzhiyun * Fill in the metadata object and acquire a reference
5426*4882a593Smuzhiyun * for the physical resource.
5427*4882a593Smuzhiyun */
5428*4882a593Smuzhiyun meta->reg = reg;
5429*4882a593Smuzhiyun
5430*4882a593Smuzhiyun /* Map the external resource to the GPU allocation of the region
5431*4882a593Smuzhiyun * and acquire the reference to the VA region
5432*4882a593Smuzhiyun */
5433*4882a593Smuzhiyun if (kbase_map_external_resource(kctx, meta->reg, NULL))
5434*4882a593Smuzhiyun goto fail_map;
5435*4882a593Smuzhiyun meta->ref = 1;
5436*4882a593Smuzhiyun
5437*4882a593Smuzhiyun list_add(&meta->ext_res_node, &kctx->ext_res_meta_head);
5438*4882a593Smuzhiyun }
5439*4882a593Smuzhiyun
5440*4882a593Smuzhiyun return meta;
5441*4882a593Smuzhiyun
5442*4882a593Smuzhiyun fail_map:
5443*4882a593Smuzhiyun kfree(meta);
5444*4882a593Smuzhiyun failed:
5445*4882a593Smuzhiyun return NULL;
5446*4882a593Smuzhiyun }
5447*4882a593Smuzhiyun
5448*4882a593Smuzhiyun static struct kbase_ctx_ext_res_meta *
find_sticky_resource_meta(struct kbase_context * kctx,u64 gpu_addr)5449*4882a593Smuzhiyun find_sticky_resource_meta(struct kbase_context *kctx, u64 gpu_addr)
5450*4882a593Smuzhiyun {
5451*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *walker;
5452*4882a593Smuzhiyun
5453*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5454*4882a593Smuzhiyun
5455*4882a593Smuzhiyun /*
5456*4882a593Smuzhiyun * Walk the per context external resource metadata list for the
5457*4882a593Smuzhiyun * metadata which matches the region which is being released.
5458*4882a593Smuzhiyun */
5459*4882a593Smuzhiyun list_for_each_entry(walker, &kctx->ext_res_meta_head, ext_res_node)
5460*4882a593Smuzhiyun if (kbasep_get_va_gpu_addr(walker->reg) == gpu_addr)
5461*4882a593Smuzhiyun return walker;
5462*4882a593Smuzhiyun
5463*4882a593Smuzhiyun return NULL;
5464*4882a593Smuzhiyun }
5465*4882a593Smuzhiyun
release_sticky_resource_meta(struct kbase_context * kctx,struct kbase_ctx_ext_res_meta * meta)5466*4882a593Smuzhiyun static void release_sticky_resource_meta(struct kbase_context *kctx,
5467*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *meta)
5468*4882a593Smuzhiyun {
5469*4882a593Smuzhiyun kbase_unmap_external_resource(kctx, meta->reg);
5470*4882a593Smuzhiyun list_del(&meta->ext_res_node);
5471*4882a593Smuzhiyun kfree(meta);
5472*4882a593Smuzhiyun }
5473*4882a593Smuzhiyun
kbase_sticky_resource_release(struct kbase_context * kctx,struct kbase_ctx_ext_res_meta * meta,u64 gpu_addr)5474*4882a593Smuzhiyun bool kbase_sticky_resource_release(struct kbase_context *kctx,
5475*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
5476*4882a593Smuzhiyun {
5477*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5478*4882a593Smuzhiyun
5479*4882a593Smuzhiyun /* Search of the metadata if one isn't provided. */
5480*4882a593Smuzhiyun if (!meta)
5481*4882a593Smuzhiyun meta = find_sticky_resource_meta(kctx, gpu_addr);
5482*4882a593Smuzhiyun
5483*4882a593Smuzhiyun /* No metadata so just return. */
5484*4882a593Smuzhiyun if (!meta)
5485*4882a593Smuzhiyun return false;
5486*4882a593Smuzhiyun
5487*4882a593Smuzhiyun if (--meta->ref != 0)
5488*4882a593Smuzhiyun return true;
5489*4882a593Smuzhiyun
5490*4882a593Smuzhiyun release_sticky_resource_meta(kctx, meta);
5491*4882a593Smuzhiyun
5492*4882a593Smuzhiyun return true;
5493*4882a593Smuzhiyun }
5494*4882a593Smuzhiyun
kbase_sticky_resource_release_force(struct kbase_context * kctx,struct kbase_ctx_ext_res_meta * meta,u64 gpu_addr)5495*4882a593Smuzhiyun bool kbase_sticky_resource_release_force(struct kbase_context *kctx,
5496*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *meta, u64 gpu_addr)
5497*4882a593Smuzhiyun {
5498*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5499*4882a593Smuzhiyun
5500*4882a593Smuzhiyun /* Search of the metadata if one isn't provided. */
5501*4882a593Smuzhiyun if (!meta)
5502*4882a593Smuzhiyun meta = find_sticky_resource_meta(kctx, gpu_addr);
5503*4882a593Smuzhiyun
5504*4882a593Smuzhiyun /* No metadata so just return. */
5505*4882a593Smuzhiyun if (!meta)
5506*4882a593Smuzhiyun return false;
5507*4882a593Smuzhiyun
5508*4882a593Smuzhiyun release_sticky_resource_meta(kctx, meta);
5509*4882a593Smuzhiyun
5510*4882a593Smuzhiyun return true;
5511*4882a593Smuzhiyun }
5512*4882a593Smuzhiyun
kbase_sticky_resource_init(struct kbase_context * kctx)5513*4882a593Smuzhiyun int kbase_sticky_resource_init(struct kbase_context *kctx)
5514*4882a593Smuzhiyun {
5515*4882a593Smuzhiyun INIT_LIST_HEAD(&kctx->ext_res_meta_head);
5516*4882a593Smuzhiyun
5517*4882a593Smuzhiyun return 0;
5518*4882a593Smuzhiyun }
5519*4882a593Smuzhiyun
kbase_sticky_resource_term(struct kbase_context * kctx)5520*4882a593Smuzhiyun void kbase_sticky_resource_term(struct kbase_context *kctx)
5521*4882a593Smuzhiyun {
5522*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta *walker;
5523*4882a593Smuzhiyun
5524*4882a593Smuzhiyun lockdep_assert_held(&kctx->reg_lock);
5525*4882a593Smuzhiyun
5526*4882a593Smuzhiyun /*
5527*4882a593Smuzhiyun * Free any sticky resources which haven't been unmapped.
5528*4882a593Smuzhiyun *
5529*4882a593Smuzhiyun * Note:
5530*4882a593Smuzhiyun * We don't care about refcounts at this point as no future
5531*4882a593Smuzhiyun * references to the meta data will be made.
5532*4882a593Smuzhiyun * Region termination would find these if we didn't free them
5533*4882a593Smuzhiyun * here, but it's more efficient if we do the clean up here.
5534*4882a593Smuzhiyun */
5535*4882a593Smuzhiyun while (!list_empty(&kctx->ext_res_meta_head)) {
5536*4882a593Smuzhiyun walker = list_first_entry(&kctx->ext_res_meta_head,
5537*4882a593Smuzhiyun struct kbase_ctx_ext_res_meta, ext_res_node);
5538*4882a593Smuzhiyun
5539*4882a593Smuzhiyun kbase_sticky_resource_release_force(kctx, walker, 0);
5540*4882a593Smuzhiyun }
5541*4882a593Smuzhiyun }
5542