xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/mmu/mali_kbase_mmu.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /**
23  * DOC: Base kernel MMU management.
24  */
25 
26 #include <linux/kernel.h>
27 #include <linux/dma-mapping.h>
28 #include <linux/migrate.h>
29 #include <mali_kbase.h>
30 #include <gpu/mali_kbase_gpu_fault.h>
31 #include <gpu/mali_kbase_gpu_regmap.h>
32 #include <tl/mali_kbase_tracepoints.h>
33 #include <backend/gpu/mali_kbase_instr_defs.h>
34 #include <mali_kbase_ctx_sched.h>
35 #include <mali_kbase_debug.h>
36 #include <mali_kbase_defs.h>
37 #include <mali_kbase_hw.h>
38 #include <mmu/mali_kbase_mmu_hw.h>
39 #include <mali_kbase_mem.h>
40 #include <mali_kbase_reset_gpu.h>
41 #include <mmu/mali_kbase_mmu.h>
42 #include <mmu/mali_kbase_mmu_internal.h>
43 #include <mali_kbase_cs_experimental.h>
44 #include <device/mali_kbase_device.h>
45 #include <uapi/gpu/arm/bifrost/gpu/mali_kbase_gpu_id.h>
46 #if !MALI_USE_CSF
47 #include <mali_kbase_hwaccess_jm.h>
48 #endif
49 
50 #include <mali_kbase_trace_gpu_mem.h>
51 #include <backend/gpu/mali_kbase_pm_internal.h>
52 
53 /* Threshold used to decide whether to flush full caches or just a physical range */
54 #define KBASE_PA_RANGE_THRESHOLD_NR_PAGES 20
55 #define MGM_DEFAULT_PTE_GROUP (0)
56 
57 /* Macro to convert updated PDGs to flags indicating levels skip in flush */
58 #define pgd_level_to_skip_flush(dirty_pgds) (~(dirty_pgds) & 0xF)
59 
60 /* Small wrapper function to factor out GPU-dependent context releasing */
release_ctx(struct kbase_device * kbdev,struct kbase_context * kctx)61 static void release_ctx(struct kbase_device *kbdev,
62 		struct kbase_context *kctx)
63 {
64 #if MALI_USE_CSF
65 	CSTD_UNUSED(kbdev);
66 	kbase_ctx_sched_release_ctx_lock(kctx);
67 #else /* MALI_USE_CSF */
68 	kbasep_js_runpool_release_ctx(kbdev, kctx);
69 #endif /* MALI_USE_CSF */
70 }
71 
mmu_hw_operation_begin(struct kbase_device * kbdev)72 static void mmu_hw_operation_begin(struct kbase_device *kbdev)
73 {
74 #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
75 #if MALI_USE_CSF
76 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) {
77 		unsigned long flags;
78 
79 		lockdep_assert_held(&kbdev->mmu_hw_mutex);
80 
81 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
82 		WARN_ON_ONCE(kbdev->mmu_hw_operation_in_progress);
83 		kbdev->mmu_hw_operation_in_progress = true;
84 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
85 	}
86 #endif /* MALI_USE_CSF */
87 #endif /* !CONFIG_MALI_BIFROST_NO_MALI */
88 }
89 
mmu_hw_operation_end(struct kbase_device * kbdev)90 static void mmu_hw_operation_end(struct kbase_device *kbdev)
91 {
92 #if !IS_ENABLED(CONFIG_MALI_BIFROST_NO_MALI)
93 #if MALI_USE_CSF
94 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_GPU2019_3878)) {
95 		unsigned long flags;
96 
97 		lockdep_assert_held(&kbdev->mmu_hw_mutex);
98 
99 		spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
100 		WARN_ON_ONCE(!kbdev->mmu_hw_operation_in_progress);
101 		kbdev->mmu_hw_operation_in_progress = false;
102 		/* Invoke the PM state machine, the L2 power off may have been
103 		 * skipped due to the MMU command.
104 		 */
105 		kbase_pm_update_state(kbdev);
106 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
107 	}
108 #endif /* MALI_USE_CSF */
109 #endif /* !CONFIG_MALI_BIFROST_NO_MALI */
110 }
111 
112 /**
113  * mmu_flush_cache_on_gpu_ctrl() - Check if cache flush needs to be done
114  * through GPU_CONTROL interface.
115  *
116  * @kbdev:         kbase device to check GPU model ID on.
117  *
118  * This function returns whether a cache flush for page table update should
119  * run through GPU_CONTROL interface or MMU_AS_CONTROL interface.
120  *
121  * Return: True if cache flush should be done on GPU command.
122  */
mmu_flush_cache_on_gpu_ctrl(struct kbase_device * kbdev)123 static bool mmu_flush_cache_on_gpu_ctrl(struct kbase_device *kbdev)
124 {
125 	uint32_t const arch_maj_cur = (kbdev->gpu_props.props.raw_props.gpu_id &
126 				       GPU_ID2_ARCH_MAJOR) >>
127 				      GPU_ID2_ARCH_MAJOR_SHIFT;
128 
129 	return arch_maj_cur > 11;
130 }
131 
132 /**
133  * mmu_flush_pa_range() - Flush physical address range
134  *
135  * @kbdev:    kbase device to issue the MMU operation on.
136  * @phys:     Starting address of the physical range to start the operation on.
137  * @nr_bytes: Number of bytes to work on.
138  * @op:       Type of cache flush operation to perform.
139  *
140  * Issue a cache flush physical range command.
141  */
142 #if MALI_USE_CSF
mmu_flush_pa_range(struct kbase_device * kbdev,phys_addr_t phys,size_t nr_bytes,enum kbase_mmu_op_type op)143 static void mmu_flush_pa_range(struct kbase_device *kbdev, phys_addr_t phys, size_t nr_bytes,
144 			       enum kbase_mmu_op_type op)
145 {
146 	u32 flush_op;
147 
148 	lockdep_assert_held(&kbdev->hwaccess_lock);
149 
150 	/* Translate operation to command */
151 	if (op == KBASE_MMU_OP_FLUSH_PT)
152 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2;
153 	else if (op == KBASE_MMU_OP_FLUSH_MEM)
154 		flush_op = GPU_COMMAND_FLUSH_PA_RANGE_CLN_INV_L2_LSC;
155 	else {
156 		dev_warn(kbdev->dev, "Invalid flush request (op = %d)", op);
157 		return;
158 	}
159 
160 	if (kbase_gpu_cache_flush_pa_range_and_busy_wait(kbdev, phys, nr_bytes, flush_op))
161 		dev_err(kbdev->dev, "Flush for physical address range did not complete");
162 }
163 #endif
164 
165 /**
166  * mmu_invalidate() - Perform an invalidate operation on MMU caches.
167  * @kbdev:      The Kbase device.
168  * @kctx:       The Kbase context.
169  * @as_nr:      GPU address space number for which invalidate is required.
170  * @op_param: Non-NULL pointer to struct containing information about the MMU
171  *            operation to perform.
172  *
173  * Perform an MMU invalidate operation on a particual address space
174  * by issuing a UNLOCK command.
175  */
mmu_invalidate(struct kbase_device * kbdev,struct kbase_context * kctx,int as_nr,const struct kbase_mmu_hw_op_param * op_param)176 static void mmu_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
177 			   const struct kbase_mmu_hw_op_param *op_param)
178 {
179 	unsigned long flags;
180 
181 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
182 
183 	if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
184 		as_nr = kctx ? kctx->as_nr : as_nr;
185 		if (kbase_mmu_hw_do_unlock(kbdev, &kbdev->as[as_nr], op_param))
186 			dev_err(kbdev->dev,
187 				"Invalidate after GPU page table update did not complete");
188 	}
189 
190 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
191 }
192 
193 /* Perform a flush/invalidate on a particular address space
194  */
mmu_flush_invalidate_as(struct kbase_device * kbdev,struct kbase_as * as,const struct kbase_mmu_hw_op_param * op_param)195 static void mmu_flush_invalidate_as(struct kbase_device *kbdev, struct kbase_as *as,
196 				    const struct kbase_mmu_hw_op_param *op_param)
197 {
198 	unsigned long flags;
199 
200 	/* AS transaction begin */
201 	mutex_lock(&kbdev->mmu_hw_mutex);
202 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
203 
204 	if (kbdev->pm.backend.gpu_powered && (kbase_mmu_hw_do_flush_locked(kbdev, as, op_param)))
205 		dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
206 
207 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
208 	mutex_unlock(&kbdev->mmu_hw_mutex);
209 	/* AS transaction end */
210 }
211 
212 /**
213  * mmu_flush_invalidate() - Perform a flush operation on GPU caches.
214  * @kbdev:      The Kbase device.
215  * @kctx:       The Kbase context.
216  * @as_nr:      GPU address space number for which flush + invalidate is required.
217  * @op_param: Non-NULL pointer to struct containing information about the MMU
218  *            operation to perform.
219  *
220  * This function performs the cache flush operation described by @op_param.
221  * The function retains a reference to the given @kctx and releases it
222  * after performing the flush operation.
223  *
224  * If operation is set to KBASE_MMU_OP_FLUSH_PT then this function will issue
225  * a cache flush + invalidate to the L2 caches and invalidate the TLBs.
226  *
227  * If operation is set to KBASE_MMU_OP_FLUSH_MEM then this function will issue
228  * a cache flush + invalidate to the L2 and GPU Load/Store caches as well as
229  * invalidating the TLBs.
230  */
mmu_flush_invalidate(struct kbase_device * kbdev,struct kbase_context * kctx,int as_nr,const struct kbase_mmu_hw_op_param * op_param)231 static void mmu_flush_invalidate(struct kbase_device *kbdev, struct kbase_context *kctx, int as_nr,
232 				 const struct kbase_mmu_hw_op_param *op_param)
233 {
234 	bool ctx_is_in_runpool;
235 
236 	/* Early out if there is nothing to do */
237 	if (op_param->nr == 0)
238 		return;
239 
240 	/* If no context is provided then MMU operation is performed on address
241 	 * space which does not belong to user space context. Otherwise, retain
242 	 * refcount to context provided and release after flush operation.
243 	 */
244 	if (!kctx) {
245 		mmu_flush_invalidate_as(kbdev, &kbdev->as[as_nr], op_param);
246 	} else {
247 #if !MALI_USE_CSF
248 		mutex_lock(&kbdev->js_data.queue_mutex);
249 		ctx_is_in_runpool = kbase_ctx_sched_inc_refcount(kctx);
250 		mutex_unlock(&kbdev->js_data.queue_mutex);
251 #else
252 		ctx_is_in_runpool = kbase_ctx_sched_inc_refcount_if_as_valid(kctx);
253 #endif /* !MALI_USE_CSF */
254 
255 		if (ctx_is_in_runpool) {
256 			KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
257 
258 			mmu_flush_invalidate_as(kbdev, &kbdev->as[kctx->as_nr], op_param);
259 
260 			release_ctx(kbdev, kctx);
261 		}
262 	}
263 }
264 
265 /**
266  * mmu_flush_invalidate_on_gpu_ctrl() - Perform a flush operation on GPU caches via
267  *                                    the GPU_CONTROL interface
268  * @kbdev:      The Kbase device.
269  * @kctx:       The Kbase context.
270  * @as_nr:      GPU address space number for which flush + invalidate is required.
271  * @op_param: Non-NULL pointer to struct containing information about the MMU
272  *            operation to perform.
273  *
274  * Perform a flush/invalidate on a particular address space via the GPU_CONTROL
275  * interface.
276  */
mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device * kbdev,struct kbase_context * kctx,int as_nr,const struct kbase_mmu_hw_op_param * op_param)277 static void mmu_flush_invalidate_on_gpu_ctrl(struct kbase_device *kbdev, struct kbase_context *kctx,
278 					int as_nr, const struct kbase_mmu_hw_op_param *op_param)
279 {
280 	unsigned long flags;
281 
282 	/* AS transaction begin */
283 	mutex_lock(&kbdev->mmu_hw_mutex);
284 	spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
285 
286 	if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0)) {
287 		as_nr = kctx ? kctx->as_nr : as_nr;
288 		if (kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, &kbdev->as[as_nr], op_param))
289 			dev_err(kbdev->dev, "Flush for GPU page table update did not complete");
290 	}
291 
292 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
293 	mutex_unlock(&kbdev->mmu_hw_mutex);
294 }
295 
kbase_mmu_sync_pgd_gpu(struct kbase_device * kbdev,struct kbase_context * kctx,phys_addr_t phys,size_t size,enum kbase_mmu_op_type flush_op)296 static void kbase_mmu_sync_pgd_gpu(struct kbase_device *kbdev, struct kbase_context *kctx,
297 				   phys_addr_t phys, size_t size,
298 				   enum kbase_mmu_op_type flush_op)
299 {
300 	kbase_mmu_flush_pa_range(kbdev, kctx, phys, size, flush_op);
301 }
302 
kbase_mmu_sync_pgd_cpu(struct kbase_device * kbdev,dma_addr_t handle,size_t size)303 static void kbase_mmu_sync_pgd_cpu(struct kbase_device *kbdev, dma_addr_t handle, size_t size)
304 {
305 	/* In non-coherent system, ensure the GPU can read
306 	 * the pages from memory
307 	 */
308 	if (kbdev->system_coherency == COHERENCY_NONE)
309 		dma_sync_single_for_device(kbdev->dev, handle, size,
310 				DMA_TO_DEVICE);
311 }
312 
313 /**
314  * kbase_mmu_sync_pgd() - sync page directory to memory when needed.
315  * @kbdev:    Device pointer.
316  * @kctx:     Context pointer.
317  * @phys:     Starting physical address of the destination region.
318  * @handle:   Address of DMA region.
319  * @size:     Size of the region to sync.
320  * @flush_op: MMU cache flush operation to perform on the physical address
321  *            range, if GPU control is available.
322  *
323  * This function is called whenever the association between a virtual address
324  * range and a physical address range changes, because a mapping is created or
325  * destroyed.
326  * One of the effects of this operation is performing an MMU cache flush
327  * operation only on the physical address range affected by this function, if
328  * GPU control is available.
329  *
330  * This should be called after each page directory update.
331  */
kbase_mmu_sync_pgd(struct kbase_device * kbdev,struct kbase_context * kctx,phys_addr_t phys,dma_addr_t handle,size_t size,enum kbase_mmu_op_type flush_op)332 static void kbase_mmu_sync_pgd(struct kbase_device *kbdev, struct kbase_context *kctx,
333 			       phys_addr_t phys, dma_addr_t handle, size_t size,
334 			       enum kbase_mmu_op_type flush_op)
335 {
336 
337 	kbase_mmu_sync_pgd_cpu(kbdev, handle, size);
338 	kbase_mmu_sync_pgd_gpu(kbdev, kctx, phys, size, flush_op);
339 }
340 
341 /*
342  * Definitions:
343  * - PGD: Page Directory.
344  * - PTE: Page Table Entry. A 64bit value pointing to the next
345  *        level of translation
346  * - ATE: Address Translation Entry. A 64bit value pointing to
347  *        a 4kB physical page.
348  */
349 
350 static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
351 					   u64 vpfn, struct tagged_addr *phys, size_t nr,
352 					   unsigned long flags, int group_id, u64 *dirty_pgds);
353 
354 /**
355  * kbase_mmu_update_and_free_parent_pgds() - Update number of valid entries and
356  *                                           free memory of the page directories
357  *
358  * @kbdev:    Device pointer.
359  * @mmut:     GPU MMU page table.
360  * @pgds:     Physical addresses of page directories to be freed.
361  * @vpfn:     The virtual page frame number.
362  * @level:    The level of MMU page table.
363  * @flush_op: The type of MMU flush operation to perform.
364  * @dirty_pgds: Flags to track every level where a PGD has been updated.
365  */
366 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
367 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
368 						  u64 vpfn, int level,
369 						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds);
370 
kbase_mmu_account_freed_pgd(struct kbase_device * kbdev,struct kbase_mmu_table * mmut)371 static void kbase_mmu_account_freed_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
372 {
373 	atomic_sub(1, &kbdev->memdev.used_pages);
374 
375 	/* If MMU tables belong to a context then pages will have been accounted
376 	 * against it, so we must decrement the usage counts here.
377 	 */
378 	if (mmut->kctx) {
379 		kbase_process_page_usage_dec(mmut->kctx, 1);
380 		atomic_sub(1, &mmut->kctx->used_pages);
381 	}
382 
383 	kbase_trace_gpu_mem_usage_dec(kbdev, mmut->kctx, 1);
384 }
385 
kbase_mmu_handle_isolated_pgd_page(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,struct page * p)386 static bool kbase_mmu_handle_isolated_pgd_page(struct kbase_device *kbdev,
387 					       struct kbase_mmu_table *mmut,
388 					       struct page *p)
389 {
390 	struct kbase_page_metadata *page_md = kbase_page_private(p);
391 	bool page_is_isolated = false;
392 
393 	lockdep_assert_held(&mmut->mmu_lock);
394 
395 	if (!kbase_page_migration_enabled)
396 		return false;
397 
398 	spin_lock(&page_md->migrate_lock);
399 	if (PAGE_STATUS_GET(page_md->status) == PT_MAPPED) {
400 		WARN_ON_ONCE(!mmut->kctx);
401 		if (IS_PAGE_ISOLATED(page_md->status)) {
402 			page_md->status = PAGE_STATUS_SET(page_md->status,
403 							  FREE_PT_ISOLATED_IN_PROGRESS);
404 			page_md->data.free_pt_isolated.kbdev = kbdev;
405 			page_is_isolated = true;
406 		} else {
407 			page_md->status =
408 				PAGE_STATUS_SET(page_md->status, FREE_IN_PROGRESS);
409 		}
410 	} else {
411 		WARN_ON_ONCE(mmut->kctx);
412 		WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != NOT_MOVABLE);
413 	}
414 	spin_unlock(&page_md->migrate_lock);
415 
416 	if (unlikely(page_is_isolated)) {
417 		/* Do the CPU cache flush and accounting here for the isolated
418 		 * PGD page, which is done inside kbase_mmu_free_pgd() for the
419 		 * PGD page that did not get isolated.
420 		 */
421 		dma_sync_single_for_device(kbdev->dev, kbase_dma_addr(p), PAGE_SIZE,
422 					   DMA_BIDIRECTIONAL);
423 		kbase_mmu_account_freed_pgd(kbdev, mmut);
424 	}
425 
426 	return page_is_isolated;
427 }
428 
429 /**
430  * kbase_mmu_free_pgd() - Free memory of the page directory
431  *
432  * @kbdev:   Device pointer.
433  * @mmut:    GPU MMU page table.
434  * @pgd:     Physical address of page directory to be freed.
435  *
436  * This function is supposed to be called with mmu_lock held and after
437  * ensuring that GPU won't be able to access the page.
438  */
kbase_mmu_free_pgd(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,phys_addr_t pgd)439 static void kbase_mmu_free_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
440 			       phys_addr_t pgd)
441 {
442 	struct page *p;
443 	bool page_is_isolated = false;
444 
445 	lockdep_assert_held(&mmut->mmu_lock);
446 
447 	p = pfn_to_page(PFN_DOWN(pgd));
448 	page_is_isolated = kbase_mmu_handle_isolated_pgd_page(kbdev, mmut, p);
449 
450 	if (likely(!page_is_isolated)) {
451 		kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, true);
452 		kbase_mmu_account_freed_pgd(kbdev, mmut);
453 	}
454 }
455 
456 /**
457  * kbase_mmu_free_pgds_list() - Free the PGD pages present in the list
458  *
459  * @kbdev:          Device pointer.
460  * @mmut:           GPU MMU page table.
461  *
462  * This function will call kbase_mmu_free_pgd() on each page directory page
463  * present in the list of free PGDs inside @mmut.
464  *
465  * The function is supposed to be called after the GPU cache and MMU TLB has
466  * been invalidated post the teardown loop.
467  *
468  * The mmu_lock shall be held prior to calling the function.
469  */
kbase_mmu_free_pgds_list(struct kbase_device * kbdev,struct kbase_mmu_table * mmut)470 static void kbase_mmu_free_pgds_list(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
471 {
472 	size_t i;
473 
474 	lockdep_assert_held(&mmut->mmu_lock);
475 
476 	for (i = 0; i < mmut->scratch_mem.free_pgds.head_index; i++)
477 		kbase_mmu_free_pgd(kbdev, mmut, page_to_phys(mmut->scratch_mem.free_pgds.pgds[i]));
478 
479 	mmut->scratch_mem.free_pgds.head_index = 0;
480 }
481 
kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table * mmut,struct page * p)482 static void kbase_mmu_add_to_free_pgds_list(struct kbase_mmu_table *mmut, struct page *p)
483 {
484 	lockdep_assert_held(&mmut->mmu_lock);
485 
486 	if (WARN_ON_ONCE(mmut->scratch_mem.free_pgds.head_index > (MAX_FREE_PGDS - 1)))
487 		return;
488 
489 	mmut->scratch_mem.free_pgds.pgds[mmut->scratch_mem.free_pgds.head_index++] = p;
490 }
491 
kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table * mmut)492 static inline void kbase_mmu_reset_free_pgds_list(struct kbase_mmu_table *mmut)
493 {
494 	lockdep_assert_held(&mmut->mmu_lock);
495 
496 	mmut->scratch_mem.free_pgds.head_index = 0;
497 }
498 
499 /**
500  * reg_grow_calc_extra_pages() - Calculate the number of backed pages to add to
501  *                               a region on a GPU page fault
502  * @kbdev:         KBase device
503  * @reg:           The region that will be backed with more pages
504  * @fault_rel_pfn: PFN of the fault relative to the start of the region
505  *
506  * This calculates how much to increase the backing of a region by, based on
507  * where a GPU page fault occurred and the flags in the region.
508  *
509  * This can be more than the minimum number of pages that would reach
510  * @fault_rel_pfn, for example to reduce the overall rate of page fault
511  * interrupts on a region, or to ensure that the end address is aligned.
512  *
513  * Return: the number of backed pages to increase by
514  */
reg_grow_calc_extra_pages(struct kbase_device * kbdev,struct kbase_va_region * reg,size_t fault_rel_pfn)515 static size_t reg_grow_calc_extra_pages(struct kbase_device *kbdev,
516 		struct kbase_va_region *reg, size_t fault_rel_pfn)
517 {
518 	size_t multiple = reg->extension;
519 	size_t reg_current_size = kbase_reg_current_backed_size(reg);
520 	size_t minimum_extra = fault_rel_pfn - reg_current_size + 1;
521 	size_t remainder;
522 
523 	if (!multiple) {
524 		dev_warn(
525 			kbdev->dev,
526 			"VA Region 0x%llx extension was 0, allocator needs to set this properly for KBASE_REG_PF_GROW",
527 			((unsigned long long)reg->start_pfn) << PAGE_SHIFT);
528 		return minimum_extra;
529 	}
530 
531 	/* Calculate the remainder to subtract from minimum_extra to make it
532 	 * the desired (rounded down) multiple of the extension.
533 	 * Depending on reg's flags, the base used for calculating multiples is
534 	 * different
535 	 */
536 
537 	/* multiple is based from the current backed size, even if the
538 	 * current backed size/pfn for end of committed memory are not
539 	 * themselves aligned to multiple
540 	 */
541 	remainder = minimum_extra % multiple;
542 
543 #if !MALI_USE_CSF
544 	if (reg->flags & KBASE_REG_TILER_ALIGN_TOP) {
545 		/* multiple is based from the top of the initial commit, which
546 		 * has been allocated in such a way that (start_pfn +
547 		 * initial_commit) is already aligned to multiple. Hence the
548 		 * pfn for the end of committed memory will also be aligned to
549 		 * multiple
550 		 */
551 		size_t initial_commit = reg->initial_commit;
552 
553 		if (fault_rel_pfn < initial_commit) {
554 			/* this case is just to catch in case it's been
555 			 * recommitted by userspace to be smaller than the
556 			 * initial commit
557 			 */
558 			minimum_extra = initial_commit - reg_current_size;
559 			remainder = 0;
560 		} else {
561 			/* same as calculating
562 			 * (fault_rel_pfn - initial_commit + 1)
563 			 */
564 			size_t pages_after_initial = minimum_extra +
565 				reg_current_size - initial_commit;
566 
567 			remainder = pages_after_initial % multiple;
568 		}
569 	}
570 #endif /* !MALI_USE_CSF */
571 
572 	if (remainder == 0)
573 		return minimum_extra;
574 
575 	return minimum_extra + multiple - remainder;
576 }
577 
578 #ifdef CONFIG_MALI_CINSTR_GWT
kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device * kbdev,struct kbase_as * faulting_as,u64 start_pfn,size_t nr,u32 kctx_id,u64 dirty_pgds)579 static void kbase_gpu_mmu_handle_write_faulting_as(struct kbase_device *kbdev,
580 						   struct kbase_as *faulting_as,
581 						   u64 start_pfn, size_t nr,
582 						   u32 kctx_id, u64 dirty_pgds)
583 {
584 	/* Calls to this function are inherently synchronous, with respect to
585 	 * MMU operations.
586 	 */
587 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
588 	struct kbase_mmu_hw_op_param op_param;
589 	int ret = 0;
590 
591 	mutex_lock(&kbdev->mmu_hw_mutex);
592 
593 	kbase_mmu_hw_clear_fault(kbdev, faulting_as,
594 			KBASE_MMU_FAULT_TYPE_PAGE);
595 
596 	/* flush L2 and unlock the VA (resumes the MMU) */
597 	op_param.vpfn = start_pfn;
598 	op_param.nr = nr;
599 	op_param.op = KBASE_MMU_OP_FLUSH_PT;
600 	op_param.kctx_id = kctx_id;
601 	op_param.mmu_sync_info = mmu_sync_info;
602 	if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
603 		unsigned long irq_flags;
604 
605 		spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
606 		op_param.flush_skip_levels =
607 				pgd_level_to_skip_flush(dirty_pgds);
608 		ret = kbase_mmu_hw_do_flush_on_gpu_ctrl(kbdev, faulting_as, &op_param);
609 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
610 	} else {
611 		mmu_hw_operation_begin(kbdev);
612 		ret = kbase_mmu_hw_do_flush(kbdev, faulting_as, &op_param);
613 		mmu_hw_operation_end(kbdev);
614 	}
615 
616 	mutex_unlock(&kbdev->mmu_hw_mutex);
617 
618 	if (ret)
619 		dev_err(kbdev->dev,
620 			"Flush for GPU page fault due to write access did not complete");
621 
622 	kbase_mmu_hw_enable_fault(kbdev, faulting_as,
623 			KBASE_MMU_FAULT_TYPE_PAGE);
624 }
625 
set_gwt_element_page_addr_and_size(struct kbasep_gwt_list_element * element,u64 fault_page_addr,struct tagged_addr fault_phys)626 static void set_gwt_element_page_addr_and_size(
627 		struct kbasep_gwt_list_element *element,
628 		u64 fault_page_addr, struct tagged_addr fault_phys)
629 {
630 	u64 fault_pfn = fault_page_addr >> PAGE_SHIFT;
631 	unsigned int vindex = fault_pfn & (NUM_4K_PAGES_IN_2MB_PAGE - 1);
632 
633 	/* If the fault address lies within a 2MB page, then consider
634 	 * the whole 2MB page for dumping to avoid incomplete dumps.
635 	 */
636 	if (is_huge(fault_phys) && (vindex == index_in_large_page(fault_phys))) {
637 		element->page_addr = fault_page_addr & ~(SZ_2M - 1);
638 		element->num_pages = NUM_4K_PAGES_IN_2MB_PAGE;
639 	} else {
640 		element->page_addr = fault_page_addr;
641 		element->num_pages = 1;
642 	}
643 }
644 
kbase_gpu_mmu_handle_write_fault(struct kbase_context * kctx,struct kbase_as * faulting_as)645 static void kbase_gpu_mmu_handle_write_fault(struct kbase_context *kctx,
646 			struct kbase_as *faulting_as)
647 {
648 	struct kbasep_gwt_list_element *pos;
649 	struct kbase_va_region *region;
650 	struct kbase_device *kbdev;
651 	struct tagged_addr *fault_phys_addr;
652 	struct kbase_fault *fault;
653 	u64 fault_pfn, pfn_offset;
654 	int as_no;
655 	u64 dirty_pgds = 0;
656 
657 	as_no = faulting_as->number;
658 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
659 	fault = &faulting_as->pf_data;
660 	fault_pfn = fault->addr >> PAGE_SHIFT;
661 
662 	kbase_gpu_vm_lock(kctx);
663 
664 	/* Find region and check if it should be writable. */
665 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
666 			fault->addr);
667 	if (kbase_is_region_invalid_or_free(region)) {
668 		kbase_gpu_vm_unlock(kctx);
669 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
670 				"Memory is not mapped on the GPU",
671 				&faulting_as->pf_data);
672 		return;
673 	}
674 
675 	if (!(region->flags & KBASE_REG_GPU_WR)) {
676 		kbase_gpu_vm_unlock(kctx);
677 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
678 				"Region does not have write permissions",
679 				&faulting_as->pf_data);
680 		return;
681 	}
682 
683 	pfn_offset = fault_pfn - region->start_pfn;
684 	fault_phys_addr = &kbase_get_gpu_phy_pages(region)[pfn_offset];
685 
686 	/* Capture addresses of faulting write location
687 	 * for job dumping if write tracking is enabled.
688 	 */
689 	if (kctx->gwt_enabled) {
690 		u64 fault_page_addr = fault->addr & PAGE_MASK;
691 		bool found = false;
692 		/* Check if this write was already handled. */
693 		list_for_each_entry(pos, &kctx->gwt_current_list, link) {
694 			if (fault_page_addr == pos->page_addr) {
695 				found = true;
696 				break;
697 			}
698 		}
699 
700 		if (!found) {
701 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
702 			if (pos) {
703 				pos->region = region;
704 				set_gwt_element_page_addr_and_size(pos,
705 					fault_page_addr, *fault_phys_addr);
706 				list_add(&pos->link, &kctx->gwt_current_list);
707 			} else {
708 				dev_warn(kbdev->dev, "kmalloc failure");
709 			}
710 		}
711 	}
712 
713 	/* Now make this faulting page writable to GPU. */
714 	kbase_mmu_update_pages_no_flush(kbdev, &kctx->mmu, fault_pfn, fault_phys_addr, 1,
715 					region->flags, region->gpu_alloc->group_id, &dirty_pgds);
716 
717 	kbase_gpu_mmu_handle_write_faulting_as(kbdev, faulting_as, fault_pfn, 1,
718 					       kctx->id, dirty_pgds);
719 
720 	kbase_gpu_vm_unlock(kctx);
721 }
722 
kbase_gpu_mmu_handle_permission_fault(struct kbase_context * kctx,struct kbase_as * faulting_as)723 static void kbase_gpu_mmu_handle_permission_fault(struct kbase_context *kctx,
724 			struct kbase_as	*faulting_as)
725 {
726 	struct kbase_fault *fault = &faulting_as->pf_data;
727 
728 	switch (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault->status)) {
729 	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
730 	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
731 		kbase_gpu_mmu_handle_write_fault(kctx, faulting_as);
732 		break;
733 	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
734 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
735 				"Execute Permission fault", fault);
736 		break;
737 	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
738 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
739 				"Read Permission fault", fault);
740 		break;
741 	default:
742 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
743 				"Unknown Permission fault", fault);
744 		break;
745 	}
746 }
747 #endif
748 
749 /**
750  * estimate_pool_space_required - Determine how much a pool should be grown by to support a future
751  * allocation
752  * @pool:           The memory pool to check, including its linked pools
753  * @pages_required: Number of 4KiB pages require for the pool to support a future allocation
754  *
755  * The value returned is accounting for the size of @pool and the size of each memory pool linked to
756  * @pool. Hence, the caller should use @pool and (if not already satisfied) all its linked pools to
757  * allocate from.
758  *
759  * Note: this is only an estimate, because even during the calculation the memory pool(s) involved
760  * can be updated to be larger or smaller. Hence, the result is only a guide as to whether an
761  * allocation could succeed, or an estimate of the correct amount to grow the pool by. The caller
762  * should keep attempting an allocation and then re-growing with a new value queried form this
763  * function until the allocation succeeds.
764  *
765  * Return: an estimate of the amount of extra 4KiB pages in @pool that are required to satisfy an
766  * allocation, or 0 if @pool (including its linked pools) is likely to already satisfy the
767  * allocation.
768  */
estimate_pool_space_required(struct kbase_mem_pool * pool,const size_t pages_required)769 static size_t estimate_pool_space_required(struct kbase_mem_pool *pool, const size_t pages_required)
770 {
771 	size_t pages_still_required;
772 
773 	for (pages_still_required = pages_required; pool != NULL && pages_still_required;
774 	     pool = pool->next_pool) {
775 		size_t pool_size_4k;
776 
777 		kbase_mem_pool_lock(pool);
778 
779 		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
780 		if (pool_size_4k >= pages_still_required)
781 			pages_still_required = 0;
782 		else
783 			pages_still_required -= pool_size_4k;
784 
785 		kbase_mem_pool_unlock(pool);
786 	}
787 	return pages_still_required;
788 }
789 
790 /**
791  * page_fault_try_alloc - Try to allocate memory from a context pool
792  * @kctx:          Context pointer
793  * @region:        Region to grow
794  * @new_pages:     Number of 4 KiB pages to allocate
795  * @pages_to_grow: Pointer to variable to store number of outstanding pages on failure. This can be
796  *                 either 4 KiB or 2 MiB pages, depending on the number of pages requested.
797  * @grow_2mb_pool: Pointer to variable to store which pool needs to grow - true for 2 MiB, false for
798  *                 4 KiB.
799  * @prealloc_sas:  Pointer to kbase_sub_alloc structures
800  *
801  * This function will try to allocate as many pages as possible from the context pool, then if
802  * required will try to allocate the remaining pages from the device pool.
803  *
804  * This function will not allocate any new memory beyond that is already present in the context or
805  * device pools. This is because it is intended to be called whilst the thread has acquired the
806  * region list lock with kbase_gpu_vm_lock(), and a large enough memory allocation whilst that is
807  * held could invoke the OoM killer and cause an effective deadlock with kbase_cpu_vm_close().
808  *
809  * If 2 MiB pages are enabled and new_pages is >= 2 MiB then pages_to_grow will be a count of 2 MiB
810  * pages, otherwise it will be a count of 4 KiB pages.
811  *
812  * Return: true if successful, false on failure
813  */
page_fault_try_alloc(struct kbase_context * kctx,struct kbase_va_region * region,size_t new_pages,int * pages_to_grow,bool * grow_2mb_pool,struct kbase_sub_alloc ** prealloc_sas)814 static bool page_fault_try_alloc(struct kbase_context *kctx,
815 		struct kbase_va_region *region, size_t new_pages,
816 		int *pages_to_grow, bool *grow_2mb_pool,
817 		struct kbase_sub_alloc **prealloc_sas)
818 {
819 	size_t total_gpu_pages_alloced = 0;
820 	size_t total_cpu_pages_alloced = 0;
821 	struct kbase_mem_pool *pool, *root_pool;
822 	bool alloc_failed = false;
823 	size_t pages_still_required;
824 	size_t total_mempools_free_4k = 0;
825 
826 	lockdep_assert_held(&kctx->reg_lock);
827 	lockdep_assert_held(&kctx->mem_partials_lock);
828 
829 	if (WARN_ON(region->gpu_alloc->group_id >=
830 		MEMORY_GROUP_MANAGER_NR_GROUPS)) {
831 		/* Do not try to grow the memory pool */
832 		*pages_to_grow = 0;
833 		return false;
834 	}
835 
836 	if (kctx->kbdev->pagesize_2mb && new_pages >= (SZ_2M / SZ_4K)) {
837 		root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
838 		*grow_2mb_pool = true;
839 	} else {
840 		root_pool = &kctx->mem_pools.small[region->gpu_alloc->group_id];
841 		*grow_2mb_pool = false;
842 	}
843 
844 	if (region->gpu_alloc != region->cpu_alloc)
845 		new_pages *= 2;
846 
847 	/* Determine how many pages are in the pools before trying to allocate.
848 	 * Don't attempt to allocate & free if the allocation can't succeed.
849 	 */
850 	pages_still_required = estimate_pool_space_required(root_pool, new_pages);
851 
852 	if (pages_still_required) {
853 		/* Insufficient pages in pools. Don't try to allocate - just
854 		 * request a grow.
855 		 */
856 		*pages_to_grow = pages_still_required;
857 
858 		return false;
859 	}
860 
861 	/* Since we're not holding any of the mempool locks, the amount of memory in the pools may
862 	 * change between the above estimate and the actual allocation.
863 	 */
864 	pages_still_required = new_pages;
865 	for (pool = root_pool; pool != NULL && pages_still_required; pool = pool->next_pool) {
866 		size_t pool_size_4k;
867 		size_t pages_to_alloc_4k;
868 		size_t pages_to_alloc_4k_per_alloc;
869 
870 		kbase_mem_pool_lock(pool);
871 
872 		/* Allocate as much as possible from this pool*/
873 		pool_size_4k = kbase_mem_pool_size(pool) << pool->order;
874 		total_mempools_free_4k += pool_size_4k;
875 		pages_to_alloc_4k = MIN(pages_still_required, pool_size_4k);
876 		if (region->gpu_alloc == region->cpu_alloc)
877 			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k;
878 		else
879 			pages_to_alloc_4k_per_alloc = pages_to_alloc_4k >> 1;
880 
881 		if (pages_to_alloc_4k) {
882 			struct tagged_addr *gpu_pages =
883 				kbase_alloc_phy_pages_helper_locked(region->gpu_alloc, pool,
884 								    pages_to_alloc_4k_per_alloc,
885 								    &prealloc_sas[0]);
886 
887 			if (!gpu_pages)
888 				alloc_failed = true;
889 			else
890 				total_gpu_pages_alloced += pages_to_alloc_4k_per_alloc;
891 
892 			if (!alloc_failed && region->gpu_alloc != region->cpu_alloc) {
893 				struct tagged_addr *cpu_pages = kbase_alloc_phy_pages_helper_locked(
894 					region->cpu_alloc, pool, pages_to_alloc_4k_per_alloc,
895 					&prealloc_sas[1]);
896 
897 				if (!cpu_pages)
898 					alloc_failed = true;
899 				else
900 					total_cpu_pages_alloced += pages_to_alloc_4k_per_alloc;
901 			}
902 		}
903 
904 		kbase_mem_pool_unlock(pool);
905 
906 		if (alloc_failed) {
907 			WARN_ON(!pages_still_required);
908 			WARN_ON(pages_to_alloc_4k >= pages_still_required);
909 			WARN_ON(pages_to_alloc_4k_per_alloc >= pages_still_required);
910 			break;
911 		}
912 
913 		pages_still_required -= pages_to_alloc_4k;
914 	}
915 
916 	if (pages_still_required) {
917 		/* Allocation was unsuccessful. We have dropped the mem_pool lock after allocation,
918 		 * so must in any case use kbase_free_phy_pages_helper() rather than
919 		 * kbase_free_phy_pages_helper_locked()
920 		 */
921 		if (total_gpu_pages_alloced > 0)
922 			kbase_free_phy_pages_helper(region->gpu_alloc, total_gpu_pages_alloced);
923 		if (region->gpu_alloc != region->cpu_alloc && total_cpu_pages_alloced > 0)
924 			kbase_free_phy_pages_helper(region->cpu_alloc, total_cpu_pages_alloced);
925 
926 		if (alloc_failed) {
927 			/* Note that in allocating from the above memory pools, we always ensure
928 			 * never to request more than is available in each pool with the pool's
929 			 * lock held. Hence failing to allocate in such situations would be unusual
930 			 * and we should cancel the growth instead (as re-growing the memory pool
931 			 * might not fix the situation)
932 			 */
933 			dev_warn(
934 				kctx->kbdev->dev,
935 				"Page allocation failure of %zu pages: managed %zu pages, mempool (inc linked pools) had %zu pages available",
936 				new_pages, total_gpu_pages_alloced + total_cpu_pages_alloced,
937 				total_mempools_free_4k);
938 			*pages_to_grow = 0;
939 		} else {
940 			/* Tell the caller to try to grow the memory pool
941 			 *
942 			 * Freeing pages above may have spilled or returned them to the OS, so we
943 			 * have to take into account how many are still in the pool before giving a
944 			 * new estimate for growth required of the pool. We can just re-estimate a
945 			 * new value.
946 			 */
947 			pages_still_required = estimate_pool_space_required(root_pool, new_pages);
948 			if (pages_still_required) {
949 				*pages_to_grow = pages_still_required;
950 			} else {
951 				/* It's possible another thread could've grown the pool to be just
952 				 * big enough after we rolled back the allocation. Request at least
953 				 * one more page to ensure the caller doesn't fail the growth by
954 				 * conflating it with the alloc_failed case above
955 				 */
956 				*pages_to_grow = 1u;
957 			}
958 		}
959 
960 		return false;
961 	}
962 
963 	/* Allocation was successful. No pages to grow, return success. */
964 	*pages_to_grow = 0;
965 
966 	return true;
967 }
968 
kbase_mmu_page_fault_worker(struct work_struct * data)969 void kbase_mmu_page_fault_worker(struct work_struct *data)
970 {
971 	u64 fault_pfn;
972 	u32 fault_status;
973 	size_t new_pages;
974 	size_t fault_rel_pfn;
975 	struct kbase_as *faulting_as;
976 	int as_no;
977 	struct kbase_context *kctx;
978 	struct kbase_device *kbdev;
979 	struct kbase_va_region *region;
980 	struct kbase_fault *fault;
981 	int err;
982 	bool grown = false;
983 	int pages_to_grow;
984 	bool grow_2mb_pool;
985 	struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
986 	int i;
987 	size_t current_backed_size;
988 #if MALI_JIT_PRESSURE_LIMIT_BASE
989 	size_t pages_trimmed = 0;
990 #endif
991 
992 	/* Calls to this function are inherently synchronous, with respect to
993 	 * MMU operations.
994 	 */
995 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_SYNC;
996 
997 	faulting_as = container_of(data, struct kbase_as, work_pagefault);
998 	fault = &faulting_as->pf_data;
999 	fault_pfn = fault->addr >> PAGE_SHIFT;
1000 	as_no = faulting_as->number;
1001 
1002 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
1003 	dev_dbg(kbdev->dev, "Entering %s %pK, fault_pfn %lld, as_no %d", __func__, (void *)data,
1004 		fault_pfn, as_no);
1005 
1006 	/* Grab the context that was already refcounted in kbase_mmu_interrupt()
1007 	 * Therefore, it cannot be scheduled out of this AS until we explicitly
1008 	 * release it
1009 	 */
1010 	kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no);
1011 	if (!kctx) {
1012 		atomic_dec(&kbdev->faults_pending);
1013 		return;
1014 	}
1015 
1016 	KBASE_DEBUG_ASSERT(kctx->kbdev == kbdev);
1017 
1018 #if MALI_JIT_PRESSURE_LIMIT_BASE
1019 #if !MALI_USE_CSF
1020 	mutex_lock(&kctx->jctx.lock);
1021 #endif
1022 #endif
1023 
1024 #ifdef CONFIG_MALI_ARBITER_SUPPORT
1025 	/* check if we still have GPU */
1026 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
1027 		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
1028 		goto fault_done;
1029 	}
1030 #endif
1031 
1032 	if (unlikely(fault->protected_mode)) {
1033 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1034 				"Protected mode fault", fault);
1035 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
1036 				KBASE_MMU_FAULT_TYPE_PAGE);
1037 
1038 		goto fault_done;
1039 	}
1040 
1041 	fault_status = fault->status;
1042 	switch (fault_status & AS_FAULTSTATUS_EXCEPTION_CODE_MASK) {
1043 
1044 	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSLATION_FAULT:
1045 		/* need to check against the region to handle this one */
1046 		break;
1047 
1048 	case AS_FAULTSTATUS_EXCEPTION_CODE_PERMISSION_FAULT:
1049 #ifdef CONFIG_MALI_CINSTR_GWT
1050 		/* If GWT was ever enabled then we need to handle
1051 		 * write fault pages even if the feature was disabled later.
1052 		 */
1053 		if (kctx->gwt_was_enabled) {
1054 			kbase_gpu_mmu_handle_permission_fault(kctx,
1055 							faulting_as);
1056 			goto fault_done;
1057 		}
1058 #endif
1059 
1060 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1061 				"Permission failure", fault);
1062 		goto fault_done;
1063 
1064 	case AS_FAULTSTATUS_EXCEPTION_CODE_TRANSTAB_BUS_FAULT:
1065 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1066 				"Translation table bus fault", fault);
1067 		goto fault_done;
1068 
1069 	case AS_FAULTSTATUS_EXCEPTION_CODE_ACCESS_FLAG:
1070 		/* nothing to do, but we don't expect this fault currently */
1071 		dev_warn(kbdev->dev, "Access flag unexpectedly set");
1072 		goto fault_done;
1073 
1074 	case AS_FAULTSTATUS_EXCEPTION_CODE_ADDRESS_SIZE_FAULT:
1075 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1076 				"Address size fault", fault);
1077 		goto fault_done;
1078 
1079 	case AS_FAULTSTATUS_EXCEPTION_CODE_MEMORY_ATTRIBUTES_FAULT:
1080 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1081 				"Memory attributes fault", fault);
1082 		goto fault_done;
1083 
1084 	default:
1085 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1086 				"Unknown fault code", fault);
1087 		goto fault_done;
1088 	}
1089 
1090 page_fault_retry:
1091 	if (kbdev->pagesize_2mb) {
1092 		/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
1093 		for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
1094 			if (!prealloc_sas[i]) {
1095 				prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
1096 
1097 				if (!prealloc_sas[i]) {
1098 					kbase_mmu_report_fault_and_kill(
1099 						kctx, faulting_as,
1100 						"Failed pre-allocating memory for sub-allocations' metadata",
1101 						fault);
1102 					goto fault_done;
1103 				}
1104 			}
1105 		}
1106 	}
1107 
1108 	/* so we have a translation fault,
1109 	 * let's see if it is for growable memory
1110 	 */
1111 	kbase_gpu_vm_lock(kctx);
1112 
1113 	region = kbase_region_tracker_find_region_enclosing_address(kctx,
1114 			fault->addr);
1115 	if (kbase_is_region_invalid_or_free(region)) {
1116 		kbase_gpu_vm_unlock(kctx);
1117 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1118 				"Memory is not mapped on the GPU", fault);
1119 		goto fault_done;
1120 	}
1121 
1122 	if (region->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
1123 		kbase_gpu_vm_unlock(kctx);
1124 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1125 				"DMA-BUF is not mapped on the GPU", fault);
1126 		goto fault_done;
1127 	}
1128 
1129 	if (region->gpu_alloc->group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) {
1130 		kbase_gpu_vm_unlock(kctx);
1131 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1132 				"Bad physical memory group ID", fault);
1133 		goto fault_done;
1134 	}
1135 
1136 	if ((region->flags & GROWABLE_FLAGS_REQUIRED)
1137 			!= GROWABLE_FLAGS_REQUIRED) {
1138 		kbase_gpu_vm_unlock(kctx);
1139 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1140 				"Memory is not growable", fault);
1141 		goto fault_done;
1142 	}
1143 
1144 	if ((region->flags & KBASE_REG_DONT_NEED)) {
1145 		kbase_gpu_vm_unlock(kctx);
1146 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1147 				"Don't need memory can't be grown", fault);
1148 		goto fault_done;
1149 	}
1150 
1151 	if (AS_FAULTSTATUS_ACCESS_TYPE_GET(fault_status) ==
1152 		AS_FAULTSTATUS_ACCESS_TYPE_READ)
1153 		dev_warn(kbdev->dev, "Grow on pagefault while reading");
1154 
1155 	/* find the size we need to grow it by
1156 	 * we know the result fit in a size_t due to
1157 	 * kbase_region_tracker_find_region_enclosing_address
1158 	 * validating the fault_address to be within a size_t from the start_pfn
1159 	 */
1160 	fault_rel_pfn = fault_pfn - region->start_pfn;
1161 
1162 	current_backed_size = kbase_reg_current_backed_size(region);
1163 
1164 	if (fault_rel_pfn < current_backed_size) {
1165 		struct kbase_mmu_hw_op_param op_param;
1166 
1167 		dev_dbg(kbdev->dev,
1168 			"Page fault @ 0x%llx in allocated region 0x%llx-0x%llx of growable TMEM: Ignoring",
1169 				fault->addr, region->start_pfn,
1170 				region->start_pfn +
1171 				current_backed_size);
1172 
1173 		mutex_lock(&kbdev->mmu_hw_mutex);
1174 
1175 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
1176 				KBASE_MMU_FAULT_TYPE_PAGE);
1177 		/* [1] in case another page fault occurred while we were
1178 		 * handling the (duplicate) page fault we need to ensure we
1179 		 * don't loose the other page fault as result of us clearing
1180 		 * the MMU IRQ. Therefore, after we clear the MMU IRQ we send
1181 		 * an UNLOCK command that will retry any stalled memory
1182 		 * transaction (which should cause the other page fault to be
1183 		 * raised again).
1184 		 */
1185 		op_param.mmu_sync_info = mmu_sync_info;
1186 		op_param.kctx_id = kctx->id;
1187 		if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
1188 			mmu_hw_operation_begin(kbdev);
1189 			err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
1190 							     &op_param);
1191 			mmu_hw_operation_end(kbdev);
1192 		} else {
1193 			/* Can safely skip the invalidate for all levels in case
1194 			 * of duplicate page faults.
1195 			 */
1196 			op_param.flush_skip_levels = 0xF;
1197 			op_param.vpfn = fault_pfn;
1198 			op_param.nr = 1;
1199 			err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
1200 						     &op_param);
1201 		}
1202 
1203 		if (err) {
1204 			dev_err(kbdev->dev,
1205 				"Invalidation for MMU did not complete on handling page fault @ 0x%llx",
1206 				fault->addr);
1207 		}
1208 
1209 		mutex_unlock(&kbdev->mmu_hw_mutex);
1210 
1211 		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
1212 				KBASE_MMU_FAULT_TYPE_PAGE);
1213 		kbase_gpu_vm_unlock(kctx);
1214 
1215 		goto fault_done;
1216 	}
1217 
1218 	new_pages = reg_grow_calc_extra_pages(kbdev, region, fault_rel_pfn);
1219 
1220 	/* cap to max vsize */
1221 	new_pages = min(new_pages, region->nr_pages - current_backed_size);
1222 	dev_dbg(kctx->kbdev->dev, "Allocate %zu pages on page fault", new_pages);
1223 
1224 	if (new_pages == 0) {
1225 		struct kbase_mmu_hw_op_param op_param;
1226 
1227 		mutex_lock(&kbdev->mmu_hw_mutex);
1228 
1229 		/* Duplicate of a fault we've already handled, nothing to do */
1230 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
1231 				KBASE_MMU_FAULT_TYPE_PAGE);
1232 
1233 		/* See comment [1] about UNLOCK usage */
1234 		op_param.mmu_sync_info = mmu_sync_info;
1235 		op_param.kctx_id = kctx->id;
1236 		if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
1237 			mmu_hw_operation_begin(kbdev);
1238 			err = kbase_mmu_hw_do_unlock_no_addr(kbdev, faulting_as,
1239 							     &op_param);
1240 			mmu_hw_operation_end(kbdev);
1241 		} else {
1242 			/* Can safely skip the invalidate for all levels in case
1243 			 * of duplicate page faults.
1244 			 */
1245 			op_param.flush_skip_levels = 0xF;
1246 			op_param.vpfn = fault_pfn;
1247 			op_param.nr = 1;
1248 			err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
1249 						     &op_param);
1250 		}
1251 
1252 		if (err) {
1253 			dev_err(kbdev->dev,
1254 				"Invalidation for MMU did not complete on handling page fault @ 0x%llx",
1255 				fault->addr);
1256 		}
1257 
1258 		mutex_unlock(&kbdev->mmu_hw_mutex);
1259 
1260 		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
1261 				KBASE_MMU_FAULT_TYPE_PAGE);
1262 		kbase_gpu_vm_unlock(kctx);
1263 		goto fault_done;
1264 	}
1265 
1266 	pages_to_grow = 0;
1267 
1268 #if MALI_JIT_PRESSURE_LIMIT_BASE
1269 	if ((region->flags & KBASE_REG_ACTIVE_JIT_ALLOC) && !pages_trimmed) {
1270 		kbase_jit_request_phys_increase(kctx, new_pages);
1271 		pages_trimmed = new_pages;
1272 	}
1273 #endif
1274 
1275 	spin_lock(&kctx->mem_partials_lock);
1276 	grown = page_fault_try_alloc(kctx, region, new_pages, &pages_to_grow,
1277 			&grow_2mb_pool, prealloc_sas);
1278 	spin_unlock(&kctx->mem_partials_lock);
1279 
1280 	if (grown) {
1281 		u64 dirty_pgds = 0;
1282 		u64 pfn_offset;
1283 		struct kbase_mmu_hw_op_param op_param;
1284 
1285 		/* alloc success */
1286 		WARN_ON(kbase_reg_current_backed_size(region) >
1287 			region->nr_pages);
1288 
1289 		/* set up the new pages */
1290 		pfn_offset = kbase_reg_current_backed_size(region) - new_pages;
1291 		/*
1292 		 * Note:
1293 		 * Issuing an MMU operation will unlock the MMU and cause the
1294 		 * translation to be replayed. If the page insertion fails then
1295 		 * rather then trying to continue the context should be killed
1296 		 * so the no_flush version of insert_pages is used which allows
1297 		 * us to unlock the MMU as we see fit.
1298 		 */
1299 		err = kbase_mmu_insert_pages_no_flush(
1300 			kbdev, &kctx->mmu, region->start_pfn + pfn_offset,
1301 			&kbase_get_gpu_phy_pages(region)[pfn_offset], new_pages, region->flags,
1302 			region->gpu_alloc->group_id, &dirty_pgds, region, false);
1303 		if (err) {
1304 			kbase_free_phy_pages_helper(region->gpu_alloc,
1305 					new_pages);
1306 			if (region->gpu_alloc != region->cpu_alloc)
1307 				kbase_free_phy_pages_helper(region->cpu_alloc,
1308 						new_pages);
1309 			kbase_gpu_vm_unlock(kctx);
1310 			/* The locked VA region will be unlocked and the cache
1311 			 * invalidated in here
1312 			 */
1313 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1314 					"Page table update failure", fault);
1315 			goto fault_done;
1316 		}
1317 		KBASE_TLSTREAM_AUX_PAGEFAULT(kbdev, kctx->id, as_no,
1318 				(u64)new_pages);
1319 		trace_mali_mmu_page_fault_grow(region, fault, new_pages);
1320 
1321 #if MALI_INCREMENTAL_RENDERING_JM
1322 		/* Switch to incremental rendering if we have nearly run out of
1323 		 * memory in a JIT memory allocation.
1324 		 */
1325 		if (region->threshold_pages &&
1326 			kbase_reg_current_backed_size(region) >
1327 				region->threshold_pages) {
1328 			dev_dbg(kctx->kbdev->dev, "%zu pages exceeded IR threshold %zu",
1329 				new_pages + current_backed_size, region->threshold_pages);
1330 
1331 			if (kbase_mmu_switch_to_ir(kctx, region) >= 0) {
1332 				dev_dbg(kctx->kbdev->dev, "Get region %pK for IR", (void *)region);
1333 				kbase_va_region_alloc_get(kctx, region);
1334 			}
1335 		}
1336 #endif
1337 
1338 		/* AS transaction begin */
1339 		mutex_lock(&kbdev->mmu_hw_mutex);
1340 
1341 		/* clear MMU interrupt - this needs to be done after updating
1342 		 * the page tables but before issuing a FLUSH command. The
1343 		 * FLUSH cmd has a side effect that it restarts stalled memory
1344 		 * transactions in other address spaces which may cause
1345 		 * another fault to occur. If we didn't clear the interrupt at
1346 		 * this stage a new IRQ might not be raised when the GPU finds
1347 		 * a MMU IRQ is already pending.
1348 		 */
1349 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
1350 					 KBASE_MMU_FAULT_TYPE_PAGE);
1351 
1352 		op_param.vpfn = region->start_pfn + pfn_offset;
1353 		op_param.nr = new_pages;
1354 		op_param.op = KBASE_MMU_OP_FLUSH_PT;
1355 		op_param.kctx_id = kctx->id;
1356 		op_param.mmu_sync_info = mmu_sync_info;
1357 		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
1358 			/* Unlock to invalidate the TLB (and resume the MMU) */
1359 			op_param.flush_skip_levels =
1360 				pgd_level_to_skip_flush(dirty_pgds);
1361 			err = kbase_mmu_hw_do_unlock(kbdev, faulting_as,
1362 						     &op_param);
1363 		} else {
1364 			/* flush L2 and unlock the VA (resumes the MMU) */
1365 			mmu_hw_operation_begin(kbdev);
1366 			err = kbase_mmu_hw_do_flush(kbdev, faulting_as,
1367 						    &op_param);
1368 			mmu_hw_operation_end(kbdev);
1369 		}
1370 
1371 		if (err) {
1372 			dev_err(kbdev->dev,
1373 				"Flush for GPU page table update did not complete on handling page fault @ 0x%llx",
1374 				fault->addr);
1375 		}
1376 
1377 		mutex_unlock(&kbdev->mmu_hw_mutex);
1378 		/* AS transaction end */
1379 
1380 		/* reenable this in the mask */
1381 		kbase_mmu_hw_enable_fault(kbdev, faulting_as,
1382 					 KBASE_MMU_FAULT_TYPE_PAGE);
1383 
1384 #ifdef CONFIG_MALI_CINSTR_GWT
1385 		if (kctx->gwt_enabled) {
1386 			/* GWT also tracks growable regions. */
1387 			struct kbasep_gwt_list_element *pos;
1388 
1389 			pos = kmalloc(sizeof(*pos), GFP_KERNEL);
1390 			if (pos) {
1391 				pos->region = region;
1392 				pos->page_addr = (region->start_pfn +
1393 							pfn_offset) <<
1394 							 PAGE_SHIFT;
1395 				pos->num_pages = new_pages;
1396 				list_add(&pos->link,
1397 					&kctx->gwt_current_list);
1398 			} else {
1399 				dev_warn(kbdev->dev, "kmalloc failure");
1400 			}
1401 		}
1402 #endif
1403 
1404 #if MALI_JIT_PRESSURE_LIMIT_BASE
1405 		if (pages_trimmed) {
1406 			kbase_jit_done_phys_increase(kctx, pages_trimmed);
1407 			pages_trimmed = 0;
1408 		}
1409 #endif
1410 		kbase_gpu_vm_unlock(kctx);
1411 	} else {
1412 		int ret = -ENOMEM;
1413 
1414 		kbase_gpu_vm_unlock(kctx);
1415 
1416 		/* If the memory pool was insufficient then grow it and retry.
1417 		 * Otherwise fail the allocation.
1418 		 */
1419 		if (pages_to_grow > 0) {
1420 			if (kbdev->pagesize_2mb && grow_2mb_pool) {
1421 				/* Round page requirement up to nearest 2 MB */
1422 				struct kbase_mem_pool *const lp_mem_pool =
1423 					&kctx->mem_pools.large[
1424 					region->gpu_alloc->group_id];
1425 
1426 				pages_to_grow = (pages_to_grow +
1427 					((1 << lp_mem_pool->order) - 1))
1428 						>> lp_mem_pool->order;
1429 
1430 				ret = kbase_mem_pool_grow(lp_mem_pool,
1431 					pages_to_grow, kctx->task);
1432 			} else {
1433 				struct kbase_mem_pool *const mem_pool =
1434 					&kctx->mem_pools.small[
1435 					region->gpu_alloc->group_id];
1436 
1437 				ret = kbase_mem_pool_grow(mem_pool,
1438 					pages_to_grow, kctx->task);
1439 			}
1440 		}
1441 		if (ret < 0) {
1442 			/* failed to extend, handle as a normal PF */
1443 			kbase_mmu_report_fault_and_kill(kctx, faulting_as,
1444 					"Page allocation failure", fault);
1445 		} else {
1446 			dev_dbg(kbdev->dev, "Try again after pool_grow");
1447 			goto page_fault_retry;
1448 		}
1449 	}
1450 
1451 fault_done:
1452 #if MALI_JIT_PRESSURE_LIMIT_BASE
1453 	if (pages_trimmed) {
1454 		kbase_gpu_vm_lock(kctx);
1455 		kbase_jit_done_phys_increase(kctx, pages_trimmed);
1456 		kbase_gpu_vm_unlock(kctx);
1457 	}
1458 #if !MALI_USE_CSF
1459 	mutex_unlock(&kctx->jctx.lock);
1460 #endif
1461 #endif
1462 
1463 	for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i)
1464 		kfree(prealloc_sas[i]);
1465 
1466 	/*
1467 	 * By this point, the fault was handled in some way,
1468 	 * so release the ctx refcount
1469 	 */
1470 	release_ctx(kbdev, kctx);
1471 
1472 	atomic_dec(&kbdev->faults_pending);
1473 	dev_dbg(kbdev->dev, "Leaving page_fault_worker %pK", (void *)data);
1474 }
1475 
kbase_mmu_alloc_pgd(struct kbase_device * kbdev,struct kbase_mmu_table * mmut)1476 static phys_addr_t kbase_mmu_alloc_pgd(struct kbase_device *kbdev,
1477 		struct kbase_mmu_table *mmut)
1478 {
1479 	u64 *page;
1480 	struct page *p;
1481 	phys_addr_t pgd;
1482 
1483 	p = kbase_mem_pool_alloc(&kbdev->mem_pools.small[mmut->group_id]);
1484 	if (!p)
1485 		return KBASE_MMU_INVALID_PGD_ADDRESS;
1486 
1487 	page = kmap(p);
1488 	if (page == NULL)
1489 		goto alloc_free;
1490 
1491 	pgd = page_to_phys(p);
1492 
1493 	/* If the MMU tables belong to a context then account the memory usage
1494 	 * to that context, otherwise the MMU tables are device wide and are
1495 	 * only accounted to the device.
1496 	 */
1497 	if (mmut->kctx) {
1498 		int new_page_count;
1499 
1500 		new_page_count = atomic_add_return(1,
1501 			&mmut->kctx->used_pages);
1502 		KBASE_TLSTREAM_AUX_PAGESALLOC(
1503 			kbdev,
1504 			mmut->kctx->id,
1505 			(u64)new_page_count);
1506 		kbase_process_page_usage_inc(mmut->kctx, 1);
1507 	}
1508 
1509 	atomic_add(1, &kbdev->memdev.used_pages);
1510 
1511 	kbase_trace_gpu_mem_usage_inc(kbdev, mmut->kctx, 1);
1512 
1513 	kbdev->mmu_mode->entries_invalidate(page, KBASE_MMU_PAGE_ENTRIES);
1514 
1515 	/* As this page is newly created, therefore there is no content to
1516 	 * clean or invalidate in the GPU caches.
1517 	 */
1518 	kbase_mmu_sync_pgd_cpu(kbdev, kbase_dma_addr(p), PAGE_SIZE);
1519 
1520 	kunmap(p);
1521 	return pgd;
1522 
1523 alloc_free:
1524 	kbase_mem_pool_free(&kbdev->mem_pools.small[mmut->group_id], p, false);
1525 
1526 	return KBASE_MMU_INVALID_PGD_ADDRESS;
1527 }
1528 
1529 /**
1530  * mmu_get_next_pgd() - Given PGD PFN for level N, return PGD PFN for level N+1
1531  *
1532  * @kbdev:    Device pointer.
1533  * @mmut:     GPU MMU page table.
1534  * @pgd:      Physical addresse of level N page directory.
1535  * @vpfn:     The virtual page frame number.
1536  * @level:    The level of MMU page table (N).
1537  *
1538  * Return:
1539  * * 0 - OK
1540  * * -EFAULT - level N+1 PGD does not exist
1541  * * -EINVAL - kmap() failed for level N PGD PFN
1542  */
mmu_get_next_pgd(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,phys_addr_t * pgd,u64 vpfn,int level)1543 static int mmu_get_next_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1544 			    phys_addr_t *pgd, u64 vpfn, int level)
1545 {
1546 	u64 *page;
1547 	phys_addr_t target_pgd;
1548 	struct page *p;
1549 
1550 	lockdep_assert_held(&mmut->mmu_lock);
1551 
1552 	/*
1553 	 * Architecture spec defines level-0 as being the top-most.
1554 	 * This is a bit unfortunate here, but we keep the same convention.
1555 	 */
1556 	vpfn >>= (3 - level) * 9;
1557 	vpfn &= 0x1FF;
1558 
1559 	p = pfn_to_page(PFN_DOWN(*pgd));
1560 	page = kmap(p);
1561 	if (page == NULL) {
1562 		dev_err(kbdev->dev, "%s: kmap failure", __func__);
1563 		return -EINVAL;
1564 	}
1565 
1566 	if (!kbdev->mmu_mode->pte_is_valid(page[vpfn], level)) {
1567 		dev_dbg(kbdev->dev, "%s: invalid PTE at level %d vpfn 0x%llx", __func__, level,
1568 			vpfn);
1569 		kunmap(p);
1570 		return -EFAULT;
1571 	} else {
1572 		target_pgd = kbdev->mmu_mode->pte_to_phy_addr(
1573 			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
1574 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[vpfn]));
1575 	}
1576 
1577 	kunmap(p);
1578 	*pgd = target_pgd;
1579 
1580 	return 0;
1581 }
1582 
1583 /**
1584  * mmu_get_lowest_valid_pgd() - Find a valid PGD at or closest to in_level
1585  *
1586  * @kbdev:    Device pointer.
1587  * @mmut:     GPU MMU page table.
1588  * @vpfn:     The virtual page frame number.
1589  * @in_level:     The level of MMU page table (N).
1590  * @out_level:    Set to the level of the lowest valid PGD found on success.
1591  *                Invalid on error.
1592  * @out_pgd:      Set to the lowest valid PGD found on success.
1593  *                Invalid on error.
1594  *
1595  * Does a page table walk starting from top level (L0) to in_level to find a valid PGD at or
1596  * closest to in_level
1597  *
1598  * Terminology:
1599  * Level-0 = Top-level = highest
1600  * Level-3 = Bottom-level = lowest
1601  *
1602  * Return:
1603  * * 0 - OK
1604  * * -EINVAL - kmap() failed during page table walk.
1605  */
mmu_get_lowest_valid_pgd(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,int in_level,int * out_level,phys_addr_t * out_pgd)1606 static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1607 				    u64 vpfn, int in_level, int *out_level, phys_addr_t *out_pgd)
1608 {
1609 	phys_addr_t pgd;
1610 	int l;
1611 	int err = 0;
1612 
1613 	lockdep_assert_held(&mmut->mmu_lock);
1614 	pgd = mmut->pgd;
1615 
1616 	for (l = MIDGARD_MMU_TOPLEVEL; l < in_level; l++) {
1617 		err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
1618 
1619 		/* Handle failure condition */
1620 		if (err) {
1621 			dev_dbg(kbdev->dev,
1622 				"%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
1623 				__func__, l + 1);
1624 			break;
1625 		}
1626 	}
1627 
1628 	*out_pgd = pgd;
1629 	*out_level = l;
1630 
1631 	/* -EFAULT indicates that pgd param was valid but the next pgd entry at vpfn was invalid.
1632 	 * This implies that we have found the lowest valid pgd. Reset the error code.
1633 	 */
1634 	if (err == -EFAULT)
1635 		err = 0;
1636 
1637 	return err;
1638 }
1639 
1640 /*
1641  * On success, sets out_pgd to the PGD for the specified level of translation
1642  * Returns -EFAULT if a valid PGD is not found
1643  */
mmu_get_pgd_at_level(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,int level,phys_addr_t * out_pgd)1644 static int mmu_get_pgd_at_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
1645 				int level, phys_addr_t *out_pgd)
1646 {
1647 	phys_addr_t pgd;
1648 	int l;
1649 
1650 	lockdep_assert_held(&mmut->mmu_lock);
1651 	pgd = mmut->pgd;
1652 
1653 	for (l = MIDGARD_MMU_TOPLEVEL; l < level; l++) {
1654 		int err = mmu_get_next_pgd(kbdev, mmut, &pgd, vpfn, l);
1655 		/* Handle failure condition */
1656 		if (err) {
1657 			dev_err(kbdev->dev,
1658 				"%s: mmu_get_next_pgd() failed to find a valid pgd at level %d",
1659 				__func__, l + 1);
1660 			return err;
1661 		}
1662 	}
1663 
1664 	*out_pgd = pgd;
1665 
1666 	return 0;
1667 }
1668 
mmu_insert_pages_failure_recovery(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 from_vpfn,u64 to_vpfn,u64 * dirty_pgds,struct tagged_addr * phys,bool ignore_page_migration)1669 static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
1670 					      struct kbase_mmu_table *mmut, u64 from_vpfn,
1671 					      u64 to_vpfn, u64 *dirty_pgds,
1672 					      struct tagged_addr *phys, bool ignore_page_migration)
1673 {
1674 	u64 vpfn = from_vpfn;
1675 	struct kbase_mmu_mode const *mmu_mode;
1676 
1677 	/* 64-bit address range is the max */
1678 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
1679 	KBASE_DEBUG_ASSERT(from_vpfn <= to_vpfn);
1680 
1681 	lockdep_assert_held(&mmut->mmu_lock);
1682 
1683 	mmu_mode = kbdev->mmu_mode;
1684 	kbase_mmu_reset_free_pgds_list(mmut);
1685 
1686 	while (vpfn < to_vpfn) {
1687 		unsigned int idx = vpfn & 0x1FF;
1688 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - idx;
1689 		unsigned int pcount = 0;
1690 		unsigned int left = to_vpfn - vpfn;
1691 		int level;
1692 		u64 *page;
1693 		phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
1694 		phys_addr_t pgd = mmut->pgd;
1695 		struct page *p = phys_to_page(pgd);
1696 
1697 		register unsigned int num_of_valid_entries;
1698 
1699 		if (count > left)
1700 			count = left;
1701 
1702 		/* need to check if this is a 2MB page or a 4kB */
1703 		for (level = MIDGARD_MMU_TOPLEVEL;
1704 				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
1705 			idx = (vpfn >> ((3 - level) * 9)) & 0x1FF;
1706 			pgds[level] = pgd;
1707 			page = kmap(p);
1708 			if (mmu_mode->ate_is_valid(page[idx], level))
1709 				break; /* keep the mapping */
1710 			kunmap(p);
1711 			pgd = mmu_mode->pte_to_phy_addr(kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
1712 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[idx]));
1713 			p = phys_to_page(pgd);
1714 		}
1715 
1716 		switch (level) {
1717 		case MIDGARD_MMU_LEVEL(2):
1718 			/* remap to single entry to update */
1719 			pcount = 1;
1720 			break;
1721 		case MIDGARD_MMU_BOTTOMLEVEL:
1722 			/* page count is the same as the logical count */
1723 			pcount = count;
1724 			break;
1725 		default:
1726 			dev_warn(kbdev->dev, "%sNo support for ATEs at level %d", __func__, level);
1727 			goto next;
1728 		}
1729 
1730 		if (dirty_pgds && pcount > 0)
1731 			*dirty_pgds |= 1ULL << level;
1732 
1733 		num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
1734 		if (WARN_ON_ONCE(num_of_valid_entries < pcount))
1735 			num_of_valid_entries = 0;
1736 		else
1737 			num_of_valid_entries -= pcount;
1738 
1739 		/* Invalidate the entries we added */
1740 		mmu_mode->entries_invalidate(&page[idx], pcount);
1741 
1742 		if (!num_of_valid_entries) {
1743 			kunmap(p);
1744 
1745 			kbase_mmu_add_to_free_pgds_list(mmut, p);
1746 
1747 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
1748 							      KBASE_MMU_OP_NONE, dirty_pgds);
1749 			vpfn += count;
1750 			continue;
1751 		}
1752 
1753 		mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
1754 
1755 		/* MMU cache flush strategy is NONE because GPU cache maintenance is
1756 		 * going to be done by the caller
1757 		 */
1758 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (idx * sizeof(u64)),
1759 				   kbase_dma_addr(p) + sizeof(u64) * idx, sizeof(u64) * pcount,
1760 				   KBASE_MMU_OP_NONE);
1761 		kunmap(p);
1762 next:
1763 		vpfn += count;
1764 	}
1765 
1766 	/* If page migration is enabled: the only way to recover from failure
1767 	 * is to mark all pages as not movable. It is not predictable what's
1768 	 * going to happen to these pages at this stage. They might return
1769 	 * movable once they are returned to a memory pool.
1770 	 */
1771 	if (kbase_page_migration_enabled && !ignore_page_migration && phys) {
1772 		const u64 num_pages = to_vpfn - from_vpfn + 1;
1773 		u64 i;
1774 
1775 		for (i = 0; i < num_pages; i++) {
1776 			struct page *phys_page = as_page(phys[i]);
1777 			struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
1778 
1779 			if (page_md) {
1780 				spin_lock(&page_md->migrate_lock);
1781 				page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
1782 				spin_unlock(&page_md->migrate_lock);
1783 			}
1784 		}
1785 	}
1786 }
1787 
mmu_flush_invalidate_insert_pages(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,const u64 vpfn,size_t nr,u64 dirty_pgds,enum kbase_caller_mmu_sync_info mmu_sync_info,bool insert_pages_failed)1788 static void mmu_flush_invalidate_insert_pages(struct kbase_device *kbdev,
1789 					      struct kbase_mmu_table *mmut, const u64 vpfn,
1790 					      size_t nr, u64 dirty_pgds,
1791 					      enum kbase_caller_mmu_sync_info mmu_sync_info,
1792 					      bool insert_pages_failed)
1793 {
1794 	struct kbase_mmu_hw_op_param op_param;
1795 	int as_nr = 0;
1796 
1797 	op_param.vpfn = vpfn;
1798 	op_param.nr = nr;
1799 	op_param.op = KBASE_MMU_OP_FLUSH_PT;
1800 	op_param.mmu_sync_info = mmu_sync_info;
1801 	op_param.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF;
1802 	op_param.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds);
1803 
1804 #if MALI_USE_CSF
1805 	as_nr = mmut->kctx ? mmut->kctx->as_nr : MCU_AS_NR;
1806 #else
1807 	WARN_ON(!mmut->kctx);
1808 #endif
1809 
1810 	/* MMU cache flush strategy depends on whether GPU control commands for
1811 	 * flushing physical address ranges are supported. The new physical pages
1812 	 * are not present in GPU caches therefore they don't need any cache
1813 	 * maintenance, but PGDs in the page table may or may not be created anew.
1814 	 *
1815 	 * Operations that affect the whole GPU cache shall only be done if it's
1816 	 * impossible to update physical ranges.
1817 	 *
1818 	 * On GPUs where flushing by physical address range is supported,
1819 	 * full cache flush is done when an error occurs during
1820 	 * insert_pages() to keep the error handling simpler.
1821 	 */
1822 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && !insert_pages_failed)
1823 		mmu_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
1824 	else
1825 		mmu_flush_invalidate(kbdev, mmut->kctx, as_nr, &op_param);
1826 }
1827 
1828 /**
1829  * update_parent_pgds() - Updates the page table from bottom level towards
1830  *                        the top level to insert a new ATE
1831  *
1832  * @kbdev:    Device pointer.
1833  * @mmut:     GPU MMU page table.
1834  * @cur_level:    The level of MMU page table where the ATE needs to be added.
1835  *                The bottom PGD level.
1836  * @insert_level: The level of MMU page table where the chain of newly allocated
1837  *                PGDs needs to be linked-in/inserted.
1838  *                The top-most PDG level to be updated.
1839  * @insert_vpfn:  The virtual page frame number for the ATE.
1840  * @pgds_to_insert: Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) that contains
1841  *                  the physical addresses of newly allocated PGDs from index
1842  *                  insert_level+1 to cur_level, and an existing PGD at index
1843  *                  insert_level.
1844  *
1845  * The newly allocated PGDs are linked from the bottom level up and inserted into the PGD
1846  * at insert_level which already exists in the MMU Page Tables.Migration status is also
1847  * updated for all the newly allocated PGD pages.
1848  *
1849  * Return:
1850  * * 0 - OK
1851  * * -EFAULT - level N+1 PGD does not exist
1852  * * -EINVAL - kmap() failed for level N PGD PFN
1853  */
update_parent_pgds(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,int cur_level,int insert_level,u64 insert_vpfn,phys_addr_t * pgds_to_insert)1854 static int update_parent_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1855 			      int cur_level, int insert_level, u64 insert_vpfn,
1856 			      phys_addr_t *pgds_to_insert)
1857 {
1858 	int pgd_index;
1859 	int err = 0;
1860 
1861 	/* Add a PTE for the new PGD page at pgd_index into the parent PGD at (pgd_index-1)
1862 	 * Loop runs from the bottom-most to the top-most level so that all entries in the chain
1863 	 * are valid when they are inserted into the MMU Page table via the insert_level PGD.
1864 	 */
1865 	for (pgd_index = cur_level; pgd_index > insert_level; pgd_index--) {
1866 		int parent_index = pgd_index - 1;
1867 		phys_addr_t parent_pgd = pgds_to_insert[parent_index];
1868 		unsigned int current_valid_entries;
1869 		u64 pte;
1870 		phys_addr_t target_pgd = pgds_to_insert[pgd_index];
1871 		u64 parent_vpfn = (insert_vpfn >> ((3 - parent_index) * 9)) & 0x1FF;
1872 		struct page *parent_page = pfn_to_page(PFN_DOWN(parent_pgd));
1873 		u64 *parent_page_va;
1874 
1875 		if (WARN_ON_ONCE(target_pgd == KBASE_MMU_INVALID_PGD_ADDRESS)) {
1876 			err = -EFAULT;
1877 			goto failure_recovery;
1878 		}
1879 
1880 		parent_page_va = kmap(parent_page);
1881 		if (unlikely(parent_page_va == NULL)) {
1882 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
1883 			err = -EINVAL;
1884 			goto failure_recovery;
1885 		}
1886 
1887 		current_valid_entries = kbdev->mmu_mode->get_num_valid_entries(parent_page_va);
1888 
1889 		kbdev->mmu_mode->entry_set_pte(&pte, target_pgd);
1890 		parent_page_va[parent_vpfn] = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
1891 			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, parent_index, pte);
1892 		kbdev->mmu_mode->set_num_valid_entries(parent_page_va, current_valid_entries + 1);
1893 		kunmap(parent_page);
1894 
1895 		if (parent_index != insert_level) {
1896 			/* Newly allocated PGDs */
1897 			kbase_mmu_sync_pgd_cpu(
1898 				kbdev, kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
1899 				sizeof(u64));
1900 		} else {
1901 			/* A new valid entry is added to an existing PGD. Perform the
1902 			 * invalidate operation for GPU cache as it could be having a
1903 			 * cacheline that contains the entry (in an invalid form).
1904 			 */
1905 			kbase_mmu_sync_pgd(
1906 				kbdev, mmut->kctx, parent_pgd + (parent_vpfn * sizeof(u64)),
1907 				kbase_dma_addr(parent_page) + (parent_vpfn * sizeof(u64)),
1908 				sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
1909 		}
1910 
1911 		/* Update the new target_pgd page to its stable state */
1912 		if (kbase_page_migration_enabled) {
1913 			struct kbase_page_metadata *page_md =
1914 				kbase_page_private(phys_to_page(target_pgd));
1915 
1916 			spin_lock(&page_md->migrate_lock);
1917 
1918 			WARN_ON_ONCE(PAGE_STATUS_GET(page_md->status) != ALLOCATE_IN_PROGRESS ||
1919 				     IS_PAGE_ISOLATED(page_md->status));
1920 
1921 			if (mmut->kctx) {
1922 				page_md->status = PAGE_STATUS_SET(page_md->status, PT_MAPPED);
1923 				page_md->data.pt_mapped.mmut = mmut;
1924 				page_md->data.pt_mapped.pgd_vpfn_level =
1925 					PGD_VPFN_LEVEL_SET(insert_vpfn, parent_index);
1926 			} else {
1927 				page_md->status = PAGE_STATUS_SET(page_md->status, NOT_MOVABLE);
1928 			}
1929 
1930 			spin_unlock(&page_md->migrate_lock);
1931 		}
1932 	}
1933 
1934 	return 0;
1935 
1936 failure_recovery:
1937 	/* Cleanup PTEs from PGDs. The Parent PGD in the loop above is just "PGD" here */
1938 	for (; pgd_index < cur_level; pgd_index++) {
1939 		phys_addr_t pgd = pgds_to_insert[pgd_index];
1940 		struct page *pgd_page = pfn_to_page(PFN_DOWN(pgd));
1941 		u64 *pgd_page_va = kmap(pgd_page);
1942 		u64 vpfn = (insert_vpfn >> ((3 - pgd_index) * 9)) & 0x1FF;
1943 
1944 		kbdev->mmu_mode->entries_invalidate(&pgd_page_va[vpfn], 1);
1945 		kunmap(pgd_page);
1946 	}
1947 
1948 	return err;
1949 }
1950 
1951 /**
1952  * mmu_insert_alloc_pgds() - allocate memory for PGDs from level_low to
1953  *                           level_high (inclusive)
1954  *
1955  * @kbdev:    Device pointer.
1956  * @mmut:     GPU MMU page table.
1957  * @level_low:  The lower bound for the levels for which the PGD allocs are required
1958  * @level_high: The higher bound for the levels for which the PGD allocs are required
1959  * @new_pgds:   Ptr to an array (size MIDGARD_MMU_BOTTOMLEVEL+1) to write the
1960  *              newly allocated PGD addresses to.
1961  *
1962  * Numerically, level_low < level_high, not to be confused with top level and
1963  * bottom level concepts for MMU PGDs. They are only used as low and high bounds
1964  * in an incrementing for-loop.
1965  *
1966  * Return:
1967  * * 0 - OK
1968  * * -ENOMEM - allocation failed for a PGD.
1969  */
mmu_insert_alloc_pgds(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,phys_addr_t * new_pgds,int level_low,int level_high)1970 static int mmu_insert_alloc_pgds(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
1971 				 phys_addr_t *new_pgds, int level_low, int level_high)
1972 {
1973 	int err = 0;
1974 	int i;
1975 
1976 	lockdep_assert_held(&mmut->mmu_lock);
1977 
1978 	for (i = level_low; i <= level_high; i++) {
1979 		do {
1980 			new_pgds[i] = kbase_mmu_alloc_pgd(kbdev, mmut);
1981 			if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
1982 				break;
1983 
1984 			mutex_unlock(&mmut->mmu_lock);
1985 			err = kbase_mem_pool_grow(&kbdev->mem_pools.small[mmut->group_id],
1986 						  level_high, NULL);
1987 			mutex_lock(&mmut->mmu_lock);
1988 			if (err) {
1989 				dev_err(kbdev->dev, "%s: kbase_mem_pool_grow() returned error %d",
1990 					__func__, err);
1991 
1992 				/* Free all PGDs allocated in previous successful iterations
1993 				 * from (i-1) to level_low
1994 				 */
1995 				for (i = (i - 1); i >= level_low; i--) {
1996 					if (new_pgds[i] != KBASE_MMU_INVALID_PGD_ADDRESS)
1997 						kbase_mmu_free_pgd(kbdev, mmut, new_pgds[i]);
1998 				}
1999 
2000 				return err;
2001 			}
2002 		} while (1);
2003 	}
2004 
2005 	return 0;
2006 }
2007 
kbase_mmu_insert_single_page(struct kbase_context * kctx,u64 start_vpfn,struct tagged_addr phys,size_t nr,unsigned long flags,int const group_id,enum kbase_caller_mmu_sync_info mmu_sync_info,bool ignore_page_migration)2008 int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 start_vpfn,
2009 				 struct tagged_addr phys, size_t nr, unsigned long flags,
2010 				 int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
2011 				 bool ignore_page_migration)
2012 {
2013 	phys_addr_t pgd;
2014 	u64 *pgd_page;
2015 	u64 insert_vpfn = start_vpfn;
2016 	size_t remain = nr;
2017 	int err;
2018 	struct kbase_device *kbdev;
2019 	u64 dirty_pgds = 0;
2020 	unsigned int i;
2021 	phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
2022 	enum kbase_mmu_op_type flush_op;
2023 	struct kbase_mmu_table *mmut = &kctx->mmu;
2024 	int l, cur_level, insert_level;
2025 
2026 	if (WARN_ON(kctx == NULL))
2027 		return -EINVAL;
2028 
2029 	/* 64-bit address range is the max */
2030 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
2031 
2032 	kbdev = kctx->kbdev;
2033 
2034 	/* Early out if there is nothing to do */
2035 	if (nr == 0)
2036 		return 0;
2037 
2038 	/* If page migration is enabled, pages involved in multiple GPU mappings
2039 	 * are always treated as not movable.
2040 	 */
2041 	if (kbase_page_migration_enabled && !ignore_page_migration) {
2042 		struct page *phys_page = as_page(phys);
2043 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
2044 
2045 		if (page_md) {
2046 			spin_lock(&page_md->migrate_lock);
2047 			page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
2048 			spin_unlock(&page_md->migrate_lock);
2049 		}
2050 	}
2051 
2052 	mutex_lock(&mmut->mmu_lock);
2053 
2054 	while (remain) {
2055 		unsigned int vindex = insert_vpfn & 0x1FF;
2056 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
2057 		struct page *p;
2058 		register unsigned int num_of_valid_entries;
2059 		bool newly_created_pgd = false;
2060 
2061 		if (count > remain)
2062 			count = remain;
2063 
2064 		cur_level = MIDGARD_MMU_BOTTOMLEVEL;
2065 		insert_level = cur_level;
2066 
2067 		/*
2068 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
2069 		 * suboptimal. We don't have to re-parse the whole tree
2070 		 * each time (just cache the l0-l2 sequence).
2071 		 * On the other hand, it's only a gain when we map more than
2072 		 * 256 pages at once (on average). Do we really care?
2073 		 */
2074 		/* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
2075 		err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
2076 					       &pgd);
2077 
2078 		if (err) {
2079 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
2080 				__func__, err);
2081 			goto fail_unlock;
2082 		}
2083 
2084 		/* No valid pgd at cur_level */
2085 		if (insert_level != cur_level) {
2086 			/* Allocate new pgds for all missing levels from the required level
2087 			 * down to the lowest valid pgd at insert_level
2088 			 */
2089 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
2090 						    cur_level);
2091 			if (err)
2092 				goto fail_unlock;
2093 
2094 			newly_created_pgd = true;
2095 
2096 			new_pgds[insert_level] = pgd;
2097 
2098 			/* If we didn't find an existing valid pgd at cur_level,
2099 			 * we've now allocated one. The ATE in the next step should
2100 			 * be inserted in this newly allocated pgd.
2101 			 */
2102 			pgd = new_pgds[cur_level];
2103 		}
2104 
2105 		p = pfn_to_page(PFN_DOWN(pgd));
2106 		pgd_page = kmap(p);
2107 		if (!pgd_page) {
2108 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
2109 			err = -ENOMEM;
2110 
2111 			goto fail_unlock_free_pgds;
2112 		}
2113 
2114 		num_of_valid_entries =
2115 			kbdev->mmu_mode->get_num_valid_entries(pgd_page);
2116 
2117 		for (i = 0; i < count; i++) {
2118 			unsigned int ofs = vindex + i;
2119 
2120 			/* Fail if the current page is a valid ATE entry */
2121 			KBASE_DEBUG_ASSERT(0 == (pgd_page[ofs] & 1UL));
2122 
2123 			pgd_page[ofs] = kbase_mmu_create_ate(kbdev,
2124 				phys, flags, MIDGARD_MMU_BOTTOMLEVEL, group_id);
2125 		}
2126 
2127 		kbdev->mmu_mode->set_num_valid_entries(
2128 			pgd_page, num_of_valid_entries + count);
2129 
2130 		dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : MIDGARD_MMU_BOTTOMLEVEL);
2131 
2132 		/* MMU cache flush operation here will depend on whether bottom level
2133 		 * PGD is newly created or not.
2134 		 *
2135 		 * If bottom level PGD is newly created then no GPU cache maintenance is
2136 		 * required as the PGD will not exist in GPU cache. Otherwise GPU cache
2137 		 * maintenance is required for existing PGD.
2138 		 */
2139 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
2140 
2141 		kbase_mmu_sync_pgd(kbdev, kctx, pgd + (vindex * sizeof(u64)),
2142 				   kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
2143 				   flush_op);
2144 
2145 		if (newly_created_pgd) {
2146 			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
2147 						 new_pgds);
2148 			if (err) {
2149 				dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
2150 					__func__, err);
2151 
2152 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
2153 
2154 				kunmap(p);
2155 				goto fail_unlock_free_pgds;
2156 			}
2157 		}
2158 
2159 		insert_vpfn += count;
2160 		remain -= count;
2161 		kunmap(p);
2162 	}
2163 
2164 	mutex_unlock(&mmut->mmu_lock);
2165 
2166 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
2167 					  false);
2168 
2169 	return 0;
2170 
2171 fail_unlock_free_pgds:
2172 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
2173 	for (l = cur_level; l > insert_level; l--)
2174 		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
2175 
2176 fail_unlock:
2177 	if (insert_vpfn != start_vpfn) {
2178 		/* Invalidate the pages we have partially completed */
2179 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, &dirty_pgds,
2180 						  NULL, true);
2181 	}
2182 
2183 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr, dirty_pgds, mmu_sync_info,
2184 					  true);
2185 	kbase_mmu_free_pgds_list(kbdev, mmut);
2186 	mutex_unlock(&mmut->mmu_lock);
2187 
2188 	return err;
2189 }
2190 
kbase_mmu_insert_single_imported_page(struct kbase_context * kctx,u64 vpfn,struct tagged_addr phys,size_t nr,unsigned long flags,int const group_id,enum kbase_caller_mmu_sync_info mmu_sync_info)2191 int kbase_mmu_insert_single_imported_page(struct kbase_context *kctx, u64 vpfn,
2192 					  struct tagged_addr phys, size_t nr, unsigned long flags,
2193 					  int const group_id,
2194 					  enum kbase_caller_mmu_sync_info mmu_sync_info)
2195 {
2196 	/* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
2197 	return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
2198 					    false);
2199 }
2200 
kbase_mmu_insert_single_aliased_page(struct kbase_context * kctx,u64 vpfn,struct tagged_addr phys,size_t nr,unsigned long flags,int const group_id,enum kbase_caller_mmu_sync_info mmu_sync_info)2201 int kbase_mmu_insert_single_aliased_page(struct kbase_context *kctx, u64 vpfn,
2202 					 struct tagged_addr phys, size_t nr, unsigned long flags,
2203 					 int const group_id,
2204 					 enum kbase_caller_mmu_sync_info mmu_sync_info)
2205 {
2206 	/* The aliasing sink page has metadata and shall be moved to NOT_MOVABLE. */
2207 	return kbase_mmu_insert_single_page(kctx, vpfn, phys, nr, flags, group_id, mmu_sync_info,
2208 					    false);
2209 }
2210 
kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,struct kbase_va_region * reg,struct kbase_mmu_table * mmut,const u64 vpfn)2211 static void kbase_mmu_progress_migration_on_insert(struct tagged_addr phys,
2212 						   struct kbase_va_region *reg,
2213 						   struct kbase_mmu_table *mmut, const u64 vpfn)
2214 {
2215 	struct page *phys_page = as_page(phys);
2216 	struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
2217 
2218 	spin_lock(&page_md->migrate_lock);
2219 
2220 	/* If no GPU va region is given: the metadata provided are
2221 	 * invalid.
2222 	 *
2223 	 * If the page is already allocated and mapped: this is
2224 	 * an additional GPU mapping, probably to create a memory
2225 	 * alias, which means it is no longer possible to migrate
2226 	 * the page easily because tracking all the GPU mappings
2227 	 * would be too costly.
2228 	 *
2229 	 * In any case: the page becomes not movable. It is kept
2230 	 * alive, but attempts to migrate it will fail. The page
2231 	 * will be freed if it is still not movable when it returns
2232 	 * to a memory pool. Notice that the movable flag is not
2233 	 * cleared because that would require taking the page lock.
2234 	 */
2235 	if (!reg || PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATED_MAPPED) {
2236 		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)NOT_MOVABLE);
2237 	} else if (PAGE_STATUS_GET(page_md->status) == (u8)ALLOCATE_IN_PROGRESS) {
2238 		page_md->status = PAGE_STATUS_SET(page_md->status, (u8)ALLOCATED_MAPPED);
2239 		page_md->data.mapped.reg = reg;
2240 		page_md->data.mapped.mmut = mmut;
2241 		page_md->data.mapped.vpfn = vpfn;
2242 	}
2243 
2244 	spin_unlock(&page_md->migrate_lock);
2245 }
2246 
kbase_mmu_progress_migration_on_teardown(struct kbase_device * kbdev,struct tagged_addr * phys,size_t requested_nr)2247 static void kbase_mmu_progress_migration_on_teardown(struct kbase_device *kbdev,
2248 						     struct tagged_addr *phys, size_t requested_nr)
2249 {
2250 	size_t i;
2251 
2252 	for (i = 0; i < requested_nr; i++) {
2253 		struct page *phys_page = as_page(phys[i]);
2254 		struct kbase_page_metadata *page_md = kbase_page_private(phys_page);
2255 
2256 		/* Skip the 4KB page that is part of a large page, as the large page is
2257 		 * excluded from the migration process.
2258 		 */
2259 		if (is_huge(phys[i]) || is_partial(phys[i]))
2260 			continue;
2261 
2262 		if (page_md) {
2263 			u8 status;
2264 
2265 			spin_lock(&page_md->migrate_lock);
2266 			status = PAGE_STATUS_GET(page_md->status);
2267 
2268 			if (status == ALLOCATED_MAPPED) {
2269 				if (IS_PAGE_ISOLATED(page_md->status)) {
2270 					page_md->status = PAGE_STATUS_SET(
2271 						page_md->status, (u8)FREE_ISOLATED_IN_PROGRESS);
2272 					page_md->data.free_isolated.kbdev = kbdev;
2273 					/* At this point, we still have a reference
2274 					 * to the page via its page migration metadata,
2275 					 * and any page with the FREE_ISOLATED_IN_PROGRESS
2276 					 * status will subsequently be freed in either
2277 					 * kbase_page_migrate() or kbase_page_putback()
2278 					 */
2279 					phys[i] = as_tagged(0);
2280 				} else
2281 					page_md->status = PAGE_STATUS_SET(page_md->status,
2282 									  (u8)FREE_IN_PROGRESS);
2283 			}
2284 
2285 			spin_unlock(&page_md->migrate_lock);
2286 		}
2287 	}
2288 }
2289 
kbase_mmu_create_ate(struct kbase_device * const kbdev,struct tagged_addr const phy,unsigned long const flags,int const level,int const group_id)2290 u64 kbase_mmu_create_ate(struct kbase_device *const kbdev,
2291 	struct tagged_addr const phy, unsigned long const flags,
2292 	int const level, int const group_id)
2293 {
2294 	u64 entry;
2295 
2296 	kbdev->mmu_mode->entry_set_ate(&entry, phy, flags, level);
2297 	return kbdev->mgm_dev->ops.mgm_update_gpu_pte(kbdev->mgm_dev,
2298 		group_id, level, entry);
2299 }
2300 
kbase_mmu_insert_pages_no_flush(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,const u64 start_vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int const group_id,u64 * dirty_pgds,struct kbase_va_region * reg,bool ignore_page_migration)2301 int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2302 				    const u64 start_vpfn, struct tagged_addr *phys, size_t nr,
2303 				    unsigned long flags, int const group_id, u64 *dirty_pgds,
2304 				    struct kbase_va_region *reg, bool ignore_page_migration)
2305 {
2306 	phys_addr_t pgd;
2307 	u64 *pgd_page;
2308 	u64 insert_vpfn = start_vpfn;
2309 	size_t remain = nr;
2310 	int err;
2311 	struct kbase_mmu_mode const *mmu_mode;
2312 	unsigned int i;
2313 	phys_addr_t new_pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
2314 	int l, cur_level, insert_level;
2315 
2316 	/* Note that 0 is a valid start_vpfn */
2317 	/* 64-bit address range is the max */
2318 	KBASE_DEBUG_ASSERT(start_vpfn <= (U64_MAX / PAGE_SIZE));
2319 
2320 	mmu_mode = kbdev->mmu_mode;
2321 
2322 	/* Early out if there is nothing to do */
2323 	if (nr == 0)
2324 		return 0;
2325 
2326 	mutex_lock(&mmut->mmu_lock);
2327 
2328 	while (remain) {
2329 		unsigned int vindex = insert_vpfn & 0x1FF;
2330 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - vindex;
2331 		struct page *p;
2332 		register unsigned int num_of_valid_entries;
2333 		bool newly_created_pgd = false;
2334 		enum kbase_mmu_op_type flush_op;
2335 
2336 		if (count > remain)
2337 			count = remain;
2338 
2339 		if (!vindex && is_huge_head(*phys))
2340 			cur_level = MIDGARD_MMU_LEVEL(2);
2341 		else
2342 			cur_level = MIDGARD_MMU_BOTTOMLEVEL;
2343 
2344 		insert_level = cur_level;
2345 
2346 		/*
2347 		 * Repeatedly calling mmu_get_lowest_valid_pgd() is clearly
2348 		 * suboptimal. We don't have to re-parse the whole tree
2349 		 * each time (just cache the l0-l2 sequence).
2350 		 * On the other hand, it's only a gain when we map more than
2351 		 * 256 pages at once (on average). Do we really care?
2352 		 */
2353 		/* insert_level < cur_level if there's no valid PGD for cur_level and insert_vpn */
2354 		err = mmu_get_lowest_valid_pgd(kbdev, mmut, insert_vpfn, cur_level, &insert_level,
2355 					       &pgd);
2356 
2357 		if (err) {
2358 			dev_err(kbdev->dev, "%s: mmu_get_lowest_valid_pgd() returned error %d",
2359 				__func__, err);
2360 			goto fail_unlock;
2361 		}
2362 
2363 		/* No valid pgd at cur_level */
2364 		if (insert_level != cur_level) {
2365 			/* Allocate new pgds for all missing levels from the required level
2366 			 * down to the lowest valid pgd at insert_level
2367 			 */
2368 			err = mmu_insert_alloc_pgds(kbdev, mmut, new_pgds, (insert_level + 1),
2369 						    cur_level);
2370 			if (err)
2371 				goto fail_unlock;
2372 
2373 			newly_created_pgd = true;
2374 
2375 			new_pgds[insert_level] = pgd;
2376 
2377 			/* If we didn't find an existing valid pgd at cur_level,
2378 			 * we've now allocated one. The ATE in the next step should
2379 			 * be inserted in this newly allocated pgd.
2380 			 */
2381 			pgd = new_pgds[cur_level];
2382 		}
2383 
2384 		p = pfn_to_page(PFN_DOWN(pgd));
2385 		pgd_page = kmap(p);
2386 		if (!pgd_page) {
2387 			dev_err(kbdev->dev, "%s: kmap failure", __func__);
2388 			err = -ENOMEM;
2389 
2390 			goto fail_unlock_free_pgds;
2391 		}
2392 
2393 		num_of_valid_entries =
2394 			mmu_mode->get_num_valid_entries(pgd_page);
2395 
2396 		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
2397 			int level_index = (insert_vpfn >> 9) & 0x1FF;
2398 			pgd_page[level_index] =
2399 				kbase_mmu_create_ate(kbdev, *phys, flags, cur_level, group_id);
2400 
2401 			num_of_valid_entries++;
2402 		} else {
2403 			for (i = 0; i < count; i++) {
2404 				unsigned int ofs = vindex + i;
2405 				u64 *target = &pgd_page[ofs];
2406 
2407 				/* Warn if the current page is a valid ATE
2408 				 * entry. The page table shouldn't have anything
2409 				 * in the place where we are trying to put a
2410 				 * new entry. Modification to page table entries
2411 				 * should be performed with
2412 				 * kbase_mmu_update_pages()
2413 				 */
2414 				WARN_ON((*target & 1UL) != 0);
2415 
2416 				*target = kbase_mmu_create_ate(kbdev,
2417 					phys[i], flags, cur_level, group_id);
2418 
2419 				/* If page migration is enabled, this is the right time
2420 				 * to update the status of the page.
2421 				 */
2422 				if (kbase_page_migration_enabled && !ignore_page_migration &&
2423 				    !is_huge(phys[i]) && !is_partial(phys[i]))
2424 					kbase_mmu_progress_migration_on_insert(phys[i], reg, mmut,
2425 									       insert_vpfn + i);
2426 			}
2427 			num_of_valid_entries += count;
2428 		}
2429 
2430 		mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
2431 
2432 		if (dirty_pgds)
2433 			*dirty_pgds |= 1ULL << (newly_created_pgd ? insert_level : cur_level);
2434 
2435 		/* MMU cache flush operation here will depend on whether bottom level
2436 		 * PGD is newly created or not.
2437 		 *
2438 		 * If bottom level PGD is newly created then no GPU cache maintenance is
2439 		 * required as the PGD will not exist in GPU cache. Otherwise GPU cache
2440 		 * maintenance is required for existing PGD.
2441 		 */
2442 		flush_op = newly_created_pgd ? KBASE_MMU_OP_NONE : KBASE_MMU_OP_FLUSH_PT;
2443 
2444 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (vindex * sizeof(u64)),
2445 				   kbase_dma_addr(p) + (vindex * sizeof(u64)), count * sizeof(u64),
2446 				   flush_op);
2447 
2448 		if (newly_created_pgd) {
2449 			err = update_parent_pgds(kbdev, mmut, cur_level, insert_level, insert_vpfn,
2450 						 new_pgds);
2451 			if (err) {
2452 				dev_err(kbdev->dev, "%s: update_parent_pgds() failed (%d)",
2453 					__func__, err);
2454 
2455 				kbdev->mmu_mode->entries_invalidate(&pgd_page[vindex], count);
2456 
2457 				kunmap(p);
2458 				goto fail_unlock_free_pgds;
2459 			}
2460 		}
2461 
2462 		phys += count;
2463 		insert_vpfn += count;
2464 		remain -= count;
2465 		kunmap(p);
2466 	}
2467 
2468 	mutex_unlock(&mmut->mmu_lock);
2469 
2470 	return 0;
2471 
2472 fail_unlock_free_pgds:
2473 	/* Free the pgds allocated by us from insert_level+1 to bottom level */
2474 	for (l = cur_level; l > insert_level; l--)
2475 		kbase_mmu_free_pgd(kbdev, mmut, new_pgds[l]);
2476 
2477 fail_unlock:
2478 	if (insert_vpfn != start_vpfn) {
2479 		/* Invalidate the pages we have partially completed */
2480 		mmu_insert_pages_failure_recovery(kbdev, mmut, start_vpfn, insert_vpfn, dirty_pgds,
2481 						  phys, ignore_page_migration);
2482 	}
2483 
2484 	mmu_flush_invalidate_insert_pages(kbdev, mmut, start_vpfn, nr,
2485 					  dirty_pgds ? *dirty_pgds : 0xF, CALLER_MMU_ASYNC, true);
2486 	kbase_mmu_free_pgds_list(kbdev, mmut);
2487 	mutex_unlock(&mmut->mmu_lock);
2488 
2489 	return err;
2490 }
2491 
2492 /*
2493  * Map 'nr' pages pointed to by 'phys' at GPU PFN 'vpfn' for GPU address space
2494  * number 'as_nr'.
2495  */
kbase_mmu_insert_pages(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int as_nr,int const group_id,enum kbase_caller_mmu_sync_info mmu_sync_info,struct kbase_va_region * reg,bool ignore_page_migration)2496 int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
2497 			   struct tagged_addr *phys, size_t nr, unsigned long flags, int as_nr,
2498 			   int const group_id, enum kbase_caller_mmu_sync_info mmu_sync_info,
2499 			   struct kbase_va_region *reg, bool ignore_page_migration)
2500 {
2501 	int err;
2502 	u64 dirty_pgds = 0;
2503 
2504 	/* Early out if there is nothing to do */
2505 	if (nr == 0)
2506 		return 0;
2507 
2508 	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
2509 					      &dirty_pgds, reg, ignore_page_migration);
2510 	if (err)
2511 		return err;
2512 
2513 	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
2514 
2515 	return 0;
2516 }
2517 
2518 KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
2519 
kbase_mmu_insert_imported_pages(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int as_nr,int const group_id,enum kbase_caller_mmu_sync_info mmu_sync_info,struct kbase_va_region * reg)2520 int kbase_mmu_insert_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2521 				    u64 vpfn, struct tagged_addr *phys, size_t nr,
2522 				    unsigned long flags, int as_nr, int const group_id,
2523 				    enum kbase_caller_mmu_sync_info mmu_sync_info,
2524 				    struct kbase_va_region *reg)
2525 {
2526 	int err;
2527 	u64 dirty_pgds = 0;
2528 
2529 	/* Early out if there is nothing to do */
2530 	if (nr == 0)
2531 		return 0;
2532 
2533 	/* Imported allocations don't have metadata and therefore always ignore the
2534 	 * page migration logic.
2535 	 */
2536 	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
2537 					      &dirty_pgds, reg, true);
2538 	if (err)
2539 		return err;
2540 
2541 	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
2542 
2543 	return 0;
2544 }
2545 
kbase_mmu_insert_aliased_pages(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int as_nr,int const group_id,enum kbase_caller_mmu_sync_info mmu_sync_info,struct kbase_va_region * reg)2546 int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2547 				   u64 vpfn, struct tagged_addr *phys, size_t nr,
2548 				   unsigned long flags, int as_nr, int const group_id,
2549 				   enum kbase_caller_mmu_sync_info mmu_sync_info,
2550 				   struct kbase_va_region *reg)
2551 {
2552 	int err;
2553 	u64 dirty_pgds = 0;
2554 
2555 	/* Early out if there is nothing to do */
2556 	if (nr == 0)
2557 		return 0;
2558 
2559 	/* Memory aliases are always built on top of existing allocations,
2560 	 * therefore the state of physical pages shall be updated.
2561 	 */
2562 	err = kbase_mmu_insert_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
2563 					      &dirty_pgds, reg, false);
2564 	if (err)
2565 		return err;
2566 
2567 	mmu_flush_invalidate_insert_pages(kbdev, mmut, vpfn, nr, dirty_pgds, mmu_sync_info, false);
2568 
2569 	return 0;
2570 }
2571 
kbase_mmu_update(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,int as_nr)2572 void kbase_mmu_update(struct kbase_device *kbdev,
2573 		struct kbase_mmu_table *mmut,
2574 		int as_nr)
2575 {
2576 	lockdep_assert_held(&kbdev->hwaccess_lock);
2577 	lockdep_assert_held(&kbdev->mmu_hw_mutex);
2578 	KBASE_DEBUG_ASSERT(as_nr != KBASEP_AS_NR_INVALID);
2579 
2580 	kbdev->mmu_mode->update(kbdev, mmut, as_nr);
2581 }
2582 KBASE_EXPORT_TEST_API(kbase_mmu_update);
2583 
kbase_mmu_disable_as(struct kbase_device * kbdev,int as_nr)2584 void kbase_mmu_disable_as(struct kbase_device *kbdev, int as_nr)
2585 {
2586 	lockdep_assert_held(&kbdev->hwaccess_lock);
2587 	lockdep_assert_held(&kbdev->mmu_hw_mutex);
2588 
2589 	kbdev->mmu_mode->disable_as(kbdev, as_nr);
2590 }
2591 
kbase_mmu_disable(struct kbase_context * kctx)2592 void kbase_mmu_disable(struct kbase_context *kctx)
2593 {
2594 	/* Calls to this function are inherently asynchronous, with respect to
2595 	 * MMU operations.
2596 	 */
2597 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
2598 	struct kbase_device *kbdev = kctx->kbdev;
2599 	struct kbase_mmu_hw_op_param op_param = { 0 };
2600 	int lock_err, flush_err;
2601 
2602 	/* ASSERT that the context has a valid as_nr, which is only the case
2603 	 * when it's scheduled in.
2604 	 *
2605 	 * as_nr won't change because the caller has the hwaccess_lock
2606 	 */
2607 	KBASE_DEBUG_ASSERT(kctx->as_nr != KBASEP_AS_NR_INVALID);
2608 
2609 	lockdep_assert_held(&kctx->kbdev->hwaccess_lock);
2610 	lockdep_assert_held(&kctx->kbdev->mmu_hw_mutex);
2611 
2612 	op_param.vpfn = 0;
2613 	op_param.nr = ~0;
2614 	op_param.op = KBASE_MMU_OP_FLUSH_MEM;
2615 	op_param.kctx_id = kctx->id;
2616 	op_param.mmu_sync_info = mmu_sync_info;
2617 
2618 #if MALI_USE_CSF
2619 	/* 0xF value used to prevent skipping of any levels when flushing */
2620 	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
2621 		op_param.flush_skip_levels = pgd_level_to_skip_flush(0xF);
2622 #endif
2623 
2624 	/* lock MMU to prevent existing jobs on GPU from executing while the AS is
2625 	 * not yet disabled
2626 	 */
2627 	lock_err = kbase_mmu_hw_do_lock(kbdev, &kbdev->as[kctx->as_nr], &op_param);
2628 	if (lock_err)
2629 		dev_err(kbdev->dev, "Failed to lock AS %d for ctx %d_%d", kctx->as_nr, kctx->tgid,
2630 			kctx->id);
2631 
2632 	/* Issue the flush command only when L2 cache is in stable power on state.
2633 	 * Any other state for L2 cache implies that shader cores are powered off,
2634 	 * which in turn implies there is no execution happening on the GPU.
2635 	 */
2636 	if (kbdev->pm.backend.l2_state == KBASE_L2_ON) {
2637 		flush_err = kbase_gpu_cache_flush_and_busy_wait(kbdev,
2638 								GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
2639 		if (flush_err)
2640 			dev_err(kbdev->dev,
2641 				"Failed to flush GPU cache when disabling AS %d for ctx %d_%d",
2642 				kctx->as_nr, kctx->tgid, kctx->id);
2643 	}
2644 	kbdev->mmu_mode->disable_as(kbdev, kctx->as_nr);
2645 
2646 	if (!lock_err) {
2647 		/* unlock the MMU to allow it to resume */
2648 		lock_err =
2649 			kbase_mmu_hw_do_unlock_no_addr(kbdev, &kbdev->as[kctx->as_nr], &op_param);
2650 		if (lock_err)
2651 			dev_err(kbdev->dev, "Failed to unlock AS %d for ctx %d_%d", kctx->as_nr,
2652 				kctx->tgid, kctx->id);
2653 	}
2654 
2655 #if !MALI_USE_CSF
2656 	/*
2657 	 * JM GPUs has some L1 read only caches that need to be invalidated
2658 	 * with START_FLUSH configuration. Purge the MMU disabled kctx from
2659 	 * the slot_rb tracking field so such invalidation is performed when
2660 	 * a new katom is executed on the affected slots.
2661 	 */
2662 	kbase_backend_slot_kctx_purge_locked(kbdev, kctx);
2663 #endif
2664 }
2665 KBASE_EXPORT_TEST_API(kbase_mmu_disable);
2666 
kbase_mmu_update_and_free_parent_pgds(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,phys_addr_t * pgds,u64 vpfn,int level,enum kbase_mmu_op_type flush_op,u64 * dirty_pgds)2667 static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
2668 						  struct kbase_mmu_table *mmut, phys_addr_t *pgds,
2669 						  u64 vpfn, int level,
2670 						  enum kbase_mmu_op_type flush_op, u64 *dirty_pgds)
2671 {
2672 	int current_level;
2673 
2674 	lockdep_assert_held(&mmut->mmu_lock);
2675 
2676 	for (current_level = level - 1; current_level >= MIDGARD_MMU_LEVEL(0);
2677 	     current_level--) {
2678 		phys_addr_t current_pgd = pgds[current_level];
2679 		struct page *p = phys_to_page(current_pgd);
2680 		u64 *current_page = kmap(p);
2681 		unsigned int current_valid_entries =
2682 			kbdev->mmu_mode->get_num_valid_entries(current_page);
2683 		int index = (vpfn >> ((3 - current_level) * 9)) & 0x1FF;
2684 
2685 		/* We need to track every level that needs updating */
2686 		if (dirty_pgds)
2687 			*dirty_pgds |= 1ULL << current_level;
2688 
2689 		kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
2690 		if (current_valid_entries == 1 &&
2691 		    current_level != MIDGARD_MMU_LEVEL(0)) {
2692 			kunmap(p);
2693 
2694 			/* Ensure the cacheline containing the last valid entry
2695 			 * of PGD is invalidated from the GPU cache, before the
2696 			 * PGD page is freed.
2697 			 */
2698 			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
2699 				current_pgd + (index * sizeof(u64)),
2700 				sizeof(u64), flush_op);
2701 
2702 			kbase_mmu_add_to_free_pgds_list(mmut, p);
2703 		} else {
2704 			current_valid_entries--;
2705 
2706 			kbdev->mmu_mode->set_num_valid_entries(
2707 				current_page, current_valid_entries);
2708 
2709 			kunmap(p);
2710 
2711 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, current_pgd + (index * sizeof(u64)),
2712 					   kbase_dma_addr(p) + (index * sizeof(u64)), sizeof(u64),
2713 					   flush_op);
2714 			break;
2715 		}
2716 	}
2717 }
2718 
2719 /**
2720  * mmu_flush_invalidate_teardown_pages() - Perform flush operation after unmapping pages.
2721  *
2722  * @kbdev:         Pointer to kbase device.
2723  * @kctx:          Pointer to kbase context.
2724  * @as_nr:         Address space number, for GPU cache maintenance operations
2725  *                 that happen outside a specific kbase context.
2726  * @phys:          Array of physical pages to flush.
2727  * @phys_page_nr:  Number of physical pages to flush.
2728  * @op_param:      Non-NULL pointer to struct containing information about the flush
2729  *                 operation to perform.
2730  *
2731  * This function will do one of three things:
2732  * 1. Invalidate the MMU caches, followed by a partial GPU cache flush of the
2733  *    individual pages that were unmapped if feature is supported on GPU.
2734  * 2. Perform a full GPU cache flush through the GPU_CONTROL interface if feature is
2735  *    supported on GPU or,
2736  * 3. Perform a full GPU cache flush through the MMU_CONTROL interface.
2737  *
2738  * When performing a partial GPU cache flush, the number of physical
2739  * pages does not have to be identical to the number of virtual pages on the MMU,
2740  * to support a single physical address flush for an aliased page.
2741  */
mmu_flush_invalidate_teardown_pages(struct kbase_device * kbdev,struct kbase_context * kctx,int as_nr,struct tagged_addr * phys,size_t phys_page_nr,struct kbase_mmu_hw_op_param * op_param)2742 static void mmu_flush_invalidate_teardown_pages(struct kbase_device *kbdev,
2743 						struct kbase_context *kctx, int as_nr,
2744 						struct tagged_addr *phys, size_t phys_page_nr,
2745 						struct kbase_mmu_hw_op_param *op_param)
2746 {
2747 	if (!mmu_flush_cache_on_gpu_ctrl(kbdev)) {
2748 		/* Full cache flush through the MMU_COMMAND */
2749 		mmu_flush_invalidate(kbdev, kctx, as_nr, op_param);
2750 	} else if (op_param->op == KBASE_MMU_OP_FLUSH_MEM) {
2751 		/* Full cache flush through the GPU_CONTROL */
2752 		mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, op_param);
2753 	}
2754 #if MALI_USE_CSF
2755 	else {
2756 		/* Partial GPU cache flush with MMU cache invalidation */
2757 		unsigned long irq_flags;
2758 		unsigned int i;
2759 		bool flush_done = false;
2760 
2761 		mmu_invalidate(kbdev, kctx, as_nr, op_param);
2762 
2763 		for (i = 0; !flush_done && i < phys_page_nr; i++) {
2764 			spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
2765 			if (kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
2766 				mmu_flush_pa_range(kbdev, as_phys_addr_t(phys[i]), PAGE_SIZE,
2767 						   KBASE_MMU_OP_FLUSH_MEM);
2768 			else
2769 				flush_done = true;
2770 			spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
2771 		}
2772 	}
2773 #endif
2774 }
2775 
kbase_mmu_teardown_pgd_pages(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,size_t nr,u64 * dirty_pgds,struct list_head * free_pgds_list,enum kbase_mmu_op_type flush_op)2776 static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
2777 					u64 vpfn, size_t nr, u64 *dirty_pgds,
2778 					struct list_head *free_pgds_list,
2779 					enum kbase_mmu_op_type flush_op)
2780 {
2781 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
2782 
2783 	lockdep_assert_held(&mmut->mmu_lock);
2784 	kbase_mmu_reset_free_pgds_list(mmut);
2785 
2786 	while (nr) {
2787 		unsigned int index = vpfn & 0x1FF;
2788 		unsigned int count = KBASE_MMU_PAGE_ENTRIES - index;
2789 		unsigned int pcount;
2790 		int level;
2791 		u64 *page;
2792 		phys_addr_t pgds[MIDGARD_MMU_BOTTOMLEVEL + 1];
2793 		register unsigned int num_of_valid_entries;
2794 		phys_addr_t pgd = mmut->pgd;
2795 		struct page *p = phys_to_page(pgd);
2796 
2797 		if (count > nr)
2798 			count = nr;
2799 
2800 		/* need to check if this is a 2MB page or a 4kB */
2801 		for (level = MIDGARD_MMU_TOPLEVEL;
2802 				level <= MIDGARD_MMU_BOTTOMLEVEL; level++) {
2803 			phys_addr_t next_pgd;
2804 
2805 			index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
2806 			page = kmap(p);
2807 			if (mmu_mode->ate_is_valid(page[index], level))
2808 				break; /* keep the mapping */
2809 			else if (!mmu_mode->pte_is_valid(page[index], level)) {
2810 				/* nothing here, advance */
2811 				switch (level) {
2812 				case MIDGARD_MMU_LEVEL(0):
2813 					count = 134217728;
2814 					break;
2815 				case MIDGARD_MMU_LEVEL(1):
2816 					count = 262144;
2817 					break;
2818 				case MIDGARD_MMU_LEVEL(2):
2819 					count = 512;
2820 					break;
2821 				case MIDGARD_MMU_LEVEL(3):
2822 					count = 1;
2823 					break;
2824 				}
2825 				if (count > nr)
2826 					count = nr;
2827 				goto next;
2828 			}
2829 			next_pgd = mmu_mode->pte_to_phy_addr(
2830 				kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
2831 					kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, page[index]));
2832 			kunmap(p);
2833 			pgds[level] = pgd;
2834 			pgd = next_pgd;
2835 			p = phys_to_page(pgd);
2836 		}
2837 
2838 		switch (level) {
2839 		case MIDGARD_MMU_LEVEL(0):
2840 		case MIDGARD_MMU_LEVEL(1):
2841 			dev_warn(kbdev->dev, "%s: No support for ATEs at level %d", __func__,
2842 				 level);
2843 			kunmap(p);
2844 			goto out;
2845 		case MIDGARD_MMU_LEVEL(2):
2846 			/* can only teardown if count >= 512 */
2847 			if (count >= 512) {
2848 				pcount = 1;
2849 			} else {
2850 				dev_warn(
2851 					kbdev->dev,
2852 					"%s: limiting teardown as it tries to do a partial 2MB teardown, need 512, but have %d to tear down",
2853 					__func__, count);
2854 				pcount = 0;
2855 			}
2856 			break;
2857 		case MIDGARD_MMU_BOTTOMLEVEL:
2858 			/* page count is the same as the logical count */
2859 			pcount = count;
2860 			break;
2861 		default:
2862 			dev_err(kbdev->dev, "%s: found non-mapped memory, early out", __func__);
2863 			vpfn += count;
2864 			nr -= count;
2865 			continue;
2866 		}
2867 
2868 		if (pcount > 0)
2869 			*dirty_pgds |= 1ULL << level;
2870 
2871 		num_of_valid_entries = mmu_mode->get_num_valid_entries(page);
2872 		if (WARN_ON_ONCE(num_of_valid_entries < pcount))
2873 			num_of_valid_entries = 0;
2874 		else
2875 			num_of_valid_entries -= pcount;
2876 
2877 		/* Invalidate the entries we added */
2878 		mmu_mode->entries_invalidate(&page[index], pcount);
2879 
2880 		if (!num_of_valid_entries) {
2881 			kunmap(p);
2882 
2883 			/* Ensure the cacheline(s) containing the last valid entries
2884 			 * of PGD is invalidated from the GPU cache, before the
2885 			 * PGD page is freed.
2886 			 */
2887 			kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
2888 				pgd + (index * sizeof(u64)),
2889 				pcount * sizeof(u64), flush_op);
2890 
2891 			kbase_mmu_add_to_free_pgds_list(mmut, p);
2892 
2893 			kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
2894 							      flush_op, dirty_pgds);
2895 
2896 			vpfn += count;
2897 			nr -= count;
2898 			continue;
2899 		}
2900 
2901 		mmu_mode->set_num_valid_entries(page, num_of_valid_entries);
2902 
2903 		kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
2904 				   kbase_dma_addr(p) + (index * sizeof(u64)), pcount * sizeof(u64),
2905 				   flush_op);
2906 next:
2907 		kunmap(p);
2908 		vpfn += count;
2909 		nr -= count;
2910 	}
2911 out:
2912 	return 0;
2913 }
2914 
kbase_mmu_teardown_pages(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,struct tagged_addr * phys,size_t nr_phys_pages,size_t nr_virt_pages,int as_nr,bool ignore_page_migration)2915 int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut, u64 vpfn,
2916 			     struct tagged_addr *phys, size_t nr_phys_pages, size_t nr_virt_pages,
2917 			     int as_nr, bool ignore_page_migration)
2918 {
2919 	u64 start_vpfn = vpfn;
2920 	enum kbase_mmu_op_type flush_op = KBASE_MMU_OP_NONE;
2921 	struct kbase_mmu_hw_op_param op_param;
2922 	int err = -EFAULT;
2923 	u64 dirty_pgds = 0;
2924 	LIST_HEAD(free_pgds_list);
2925 
2926 	/* Calls to this function are inherently asynchronous, with respect to
2927 	 * MMU operations.
2928 	 */
2929 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
2930 
2931 	/* This function performs two operations: MMU maintenance and flushing
2932 	 * the caches. To ensure internal consistency between the caches and the
2933 	 * MMU, it does not make sense to be able to flush only the physical pages
2934 	 * from the cache and keep the PTE, nor does it make sense to use this
2935 	 * function to remove a PTE and keep the physical pages in the cache.
2936 	 *
2937 	 * However, we have legitimate cases where we can try to tear down a mapping
2938 	 * with zero virtual and zero physical pages, so we must have the following
2939 	 * behaviour:
2940 	 *  - if both physical and virtual page counts are zero, return early
2941 	 *  - if either physical and virtual page counts are zero, return early
2942 	 *  - if there are fewer physical pages than virtual pages, return -EINVAL
2943 	 */
2944 	if (unlikely(nr_virt_pages == 0 || nr_phys_pages == 0))
2945 		return 0;
2946 
2947 	if (unlikely(nr_virt_pages < nr_phys_pages))
2948 		return -EINVAL;
2949 
2950 	/* MMU cache flush strategy depends on the number of pages to unmap. In both cases
2951 	 * the operation is invalidate but the granularity of cache maintenance may change
2952 	 * according to the situation.
2953 	 *
2954 	 * If GPU control command operations are present and the number of pages is "small",
2955 	 * then the optimal strategy is flushing on the physical address range of the pages
2956 	 * which are affected by the operation. That implies both the PGDs which are modified
2957 	 * or removed from the page table and the physical pages which are freed from memory.
2958 	 *
2959 	 * Otherwise, there's no alternative to invalidating the whole GPU cache.
2960 	 */
2961 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && phys &&
2962 	    nr_phys_pages <= KBASE_PA_RANGE_THRESHOLD_NR_PAGES)
2963 		flush_op = KBASE_MMU_OP_FLUSH_PT;
2964 
2965 	mutex_lock(&mmut->mmu_lock);
2966 
2967 	err = kbase_mmu_teardown_pgd_pages(kbdev, mmut, vpfn, nr_virt_pages, &dirty_pgds,
2968 					   &free_pgds_list, flush_op);
2969 
2970 	/* Set up MMU operation parameters. See above about MMU cache flush strategy. */
2971 	op_param = (struct kbase_mmu_hw_op_param){
2972 		.vpfn = start_vpfn,
2973 		.nr = nr_virt_pages,
2974 		.mmu_sync_info = mmu_sync_info,
2975 		.kctx_id = mmut->kctx ? mmut->kctx->id : 0xFFFFFFFF,
2976 		.op = (flush_op == KBASE_MMU_OP_FLUSH_PT) ? KBASE_MMU_OP_FLUSH_PT :
2977 							    KBASE_MMU_OP_FLUSH_MEM,
2978 		.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
2979 	};
2980 	mmu_flush_invalidate_teardown_pages(kbdev, mmut->kctx, as_nr, phys, nr_phys_pages,
2981 					    &op_param);
2982 
2983 	/* If page migration is enabled: the status of all physical pages involved
2984 	 * shall be updated, unless they are not movable. Their status shall be
2985 	 * updated before releasing the lock to protect against concurrent
2986 	 * requests to migrate the pages, if they have been isolated.
2987 	 */
2988 	if (kbase_page_migration_enabled && phys && !ignore_page_migration)
2989 		kbase_mmu_progress_migration_on_teardown(kbdev, phys, nr_phys_pages);
2990 
2991 	kbase_mmu_free_pgds_list(kbdev, mmut);
2992 
2993 	mutex_unlock(&mmut->mmu_lock);
2994 
2995 	return err;
2996 }
2997 KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
2998 
2999 /**
3000  * kbase_mmu_update_pages_no_flush() - Update phy pages and attributes data in GPU
3001  *                                     page table entries
3002  *
3003  * @kbdev: Pointer to kbase device.
3004  * @mmut:  The involved MMU table
3005  * @vpfn:  Virtual PFN (Page Frame Number) of the first page to update
3006  * @phys:  Pointer to the array of tagged physical addresses of the physical
3007  *         pages that are pointed to by the page table entries (that need to
3008  *         be updated). The pointer should be within the reg->gpu_alloc->pages
3009  *         array.
3010  * @nr:    Number of pages to update
3011  * @flags: Flags
3012  * @group_id: The physical memory group in which the page was allocated.
3013  *            Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
3014  * @dirty_pgds: Flags to track every level where a PGD has been updated.
3015  *
3016  * This will update page table entries that already exist on the GPU based on
3017  * new flags and replace any existing phy pages that are passed (the PGD pages
3018  * remain unchanged). It is used as a response to the changes of phys as well
3019  * as the the memory attributes.
3020  *
3021  * The caller is responsible for validating the memory attributes.
3022  *
3023  * Return: 0 if the attributes data in page table entries were updated
3024  *         successfully, otherwise an error code.
3025  */
kbase_mmu_update_pages_no_flush(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int const group_id,u64 * dirty_pgds)3026 static int kbase_mmu_update_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
3027 					   u64 vpfn, struct tagged_addr *phys, size_t nr,
3028 					   unsigned long flags, int const group_id, u64 *dirty_pgds)
3029 {
3030 	phys_addr_t pgd;
3031 	u64 *pgd_page;
3032 	int err;
3033 
3034 	KBASE_DEBUG_ASSERT(vpfn <= (U64_MAX / PAGE_SIZE));
3035 
3036 	/* Early out if there is nothing to do */
3037 	if (nr == 0)
3038 		return 0;
3039 
3040 	mutex_lock(&mmut->mmu_lock);
3041 
3042 	while (nr) {
3043 		unsigned int i;
3044 		unsigned int index = vpfn & 0x1FF;
3045 		size_t count = KBASE_MMU_PAGE_ENTRIES - index;
3046 		struct page *p;
3047 		register unsigned int num_of_valid_entries;
3048 		int cur_level = MIDGARD_MMU_BOTTOMLEVEL;
3049 
3050 		if (count > nr)
3051 			count = nr;
3052 
3053 		if (is_huge(*phys) && (index == index_in_large_page(*phys)))
3054 			cur_level = MIDGARD_MMU_LEVEL(2);
3055 
3056 		err = mmu_get_pgd_at_level(kbdev, mmut, vpfn, cur_level, &pgd);
3057 		if (WARN_ON(err))
3058 			goto fail_unlock;
3059 
3060 		p = pfn_to_page(PFN_DOWN(pgd));
3061 		pgd_page = kmap(p);
3062 		if (!pgd_page) {
3063 			dev_warn(kbdev->dev, "kmap failure on update_pages");
3064 			err = -ENOMEM;
3065 			goto fail_unlock;
3066 		}
3067 
3068 		num_of_valid_entries =
3069 			kbdev->mmu_mode->get_num_valid_entries(pgd_page);
3070 
3071 		if (cur_level == MIDGARD_MMU_LEVEL(2)) {
3072 			int level_index = (vpfn >> 9) & 0x1FF;
3073 			struct tagged_addr *target_phys =
3074 				phys - index_in_large_page(*phys);
3075 
3076 #ifdef CONFIG_MALI_BIFROST_DEBUG
3077 			WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
3078 					pgd_page[level_index], MIDGARD_MMU_LEVEL(2)));
3079 #endif
3080 			pgd_page[level_index] = kbase_mmu_create_ate(kbdev,
3081 					*target_phys, flags, MIDGARD_MMU_LEVEL(2),
3082 					group_id);
3083 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (level_index * sizeof(u64)),
3084 					   kbase_dma_addr(p) + (level_index * sizeof(u64)),
3085 					   sizeof(u64), KBASE_MMU_OP_NONE);
3086 		} else {
3087 			for (i = 0; i < count; i++) {
3088 #ifdef CONFIG_MALI_BIFROST_DEBUG
3089 				WARN_ON_ONCE(!kbdev->mmu_mode->ate_is_valid(
3090 						pgd_page[index + i],
3091 						MIDGARD_MMU_BOTTOMLEVEL));
3092 #endif
3093 				pgd_page[index + i] = kbase_mmu_create_ate(kbdev,
3094 					phys[i], flags, MIDGARD_MMU_BOTTOMLEVEL,
3095 					group_id);
3096 			}
3097 
3098 			/* MMU cache flush strategy is NONE because GPU cache maintenance
3099 			 * will be done by the caller.
3100 			 */
3101 			kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
3102 					   kbase_dma_addr(p) + (index * sizeof(u64)),
3103 					   count * sizeof(u64), KBASE_MMU_OP_NONE);
3104 		}
3105 
3106 		kbdev->mmu_mode->set_num_valid_entries(pgd_page,
3107 					num_of_valid_entries);
3108 
3109 		if (dirty_pgds && count > 0)
3110 			*dirty_pgds |= 1ULL << cur_level;
3111 
3112 		phys += count;
3113 		vpfn += count;
3114 		nr -= count;
3115 
3116 		kunmap(p);
3117 	}
3118 
3119 	mutex_unlock(&mmut->mmu_lock);
3120 	return 0;
3121 
3122 fail_unlock:
3123 	mutex_unlock(&mmut->mmu_lock);
3124 	return err;
3125 }
3126 
kbase_mmu_update_pages_common(struct kbase_device * kbdev,struct kbase_context * kctx,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int const group_id)3127 static int kbase_mmu_update_pages_common(struct kbase_device *kbdev, struct kbase_context *kctx,
3128 					 u64 vpfn, struct tagged_addr *phys, size_t nr,
3129 					 unsigned long flags, int const group_id)
3130 {
3131 	int err;
3132 	struct kbase_mmu_hw_op_param op_param;
3133 	u64 dirty_pgds = 0;
3134 	struct kbase_mmu_table *mmut;
3135 	/* Calls to this function are inherently asynchronous, with respect to
3136 	 * MMU operations.
3137 	 */
3138 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
3139 	int as_nr;
3140 
3141 #if !MALI_USE_CSF
3142 	if (unlikely(kctx == NULL))
3143 		return -EINVAL;
3144 
3145 	as_nr = kctx->as_nr;
3146 	mmut = &kctx->mmu;
3147 #else
3148 	if (kctx) {
3149 		mmut = &kctx->mmu;
3150 		as_nr = kctx->as_nr;
3151 	} else {
3152 		mmut = &kbdev->csf.mcu_mmu;
3153 		as_nr = MCU_AS_NR;
3154 	}
3155 #endif
3156 
3157 	err = kbase_mmu_update_pages_no_flush(kbdev, mmut, vpfn, phys, nr, flags, group_id,
3158 					      &dirty_pgds);
3159 
3160 	op_param = (const struct kbase_mmu_hw_op_param){
3161 		.vpfn = vpfn,
3162 		.nr = nr,
3163 		.op = KBASE_MMU_OP_FLUSH_MEM,
3164 		.kctx_id = kctx ? kctx->id : 0xFFFFFFFF,
3165 		.mmu_sync_info = mmu_sync_info,
3166 		.flush_skip_levels = pgd_level_to_skip_flush(dirty_pgds),
3167 	};
3168 
3169 	if (mmu_flush_cache_on_gpu_ctrl(kbdev))
3170 		mmu_flush_invalidate_on_gpu_ctrl(kbdev, kctx, as_nr, &op_param);
3171 	else
3172 		mmu_flush_invalidate(kbdev, kctx, as_nr, &op_param);
3173 
3174 	return err;
3175 }
3176 
kbase_mmu_update_pages(struct kbase_context * kctx,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int const group_id)3177 int kbase_mmu_update_pages(struct kbase_context *kctx, u64 vpfn, struct tagged_addr *phys,
3178 			   size_t nr, unsigned long flags, int const group_id)
3179 {
3180 	if (unlikely(kctx == NULL))
3181 		return -EINVAL;
3182 
3183 	return kbase_mmu_update_pages_common(kctx->kbdev, kctx, vpfn, phys, nr, flags, group_id);
3184 }
3185 
3186 #if MALI_USE_CSF
kbase_mmu_update_csf_mcu_pages(struct kbase_device * kbdev,u64 vpfn,struct tagged_addr * phys,size_t nr,unsigned long flags,int const group_id)3187 int kbase_mmu_update_csf_mcu_pages(struct kbase_device *kbdev, u64 vpfn, struct tagged_addr *phys,
3188 				   size_t nr, unsigned long flags, int const group_id)
3189 {
3190 	return kbase_mmu_update_pages_common(kbdev, NULL, vpfn, phys, nr, flags, group_id);
3191 }
3192 #endif /* MALI_USE_CSF */
3193 
mmu_page_migration_transaction_begin(struct kbase_device * kbdev)3194 static void mmu_page_migration_transaction_begin(struct kbase_device *kbdev)
3195 {
3196 	lockdep_assert_held(&kbdev->hwaccess_lock);
3197 
3198 	WARN_ON_ONCE(kbdev->mmu_page_migrate_in_progress);
3199 	kbdev->mmu_page_migrate_in_progress = true;
3200 }
3201 
mmu_page_migration_transaction_end(struct kbase_device * kbdev)3202 static void mmu_page_migration_transaction_end(struct kbase_device *kbdev)
3203 {
3204 	lockdep_assert_held(&kbdev->hwaccess_lock);
3205 	WARN_ON_ONCE(!kbdev->mmu_page_migrate_in_progress);
3206 	kbdev->mmu_page_migrate_in_progress = false;
3207 	/* Invoke the PM state machine, as the MMU page migration session
3208 	 * may have deferred a transition in L2 state machine.
3209 	 */
3210 	kbase_pm_update_state(kbdev);
3211 }
3212 
kbase_mmu_migrate_page(struct tagged_addr old_phys,struct tagged_addr new_phys,dma_addr_t old_dma_addr,dma_addr_t new_dma_addr,int level)3213 int kbase_mmu_migrate_page(struct tagged_addr old_phys, struct tagged_addr new_phys,
3214 			   dma_addr_t old_dma_addr, dma_addr_t new_dma_addr, int level)
3215 {
3216 	struct kbase_page_metadata *page_md = kbase_page_private(as_page(old_phys));
3217 	struct kbase_mmu_hw_op_param op_param;
3218 	struct kbase_mmu_table *mmut = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
3219 					       page_md->data.mapped.mmut :
3220 					       page_md->data.pt_mapped.mmut;
3221 	struct kbase_device *kbdev;
3222 	phys_addr_t pgd;
3223 	u64 *old_page, *new_page, *pgd_page, *target, vpfn;
3224 	int index, check_state, ret = 0;
3225 	unsigned long hwaccess_flags = 0;
3226 	unsigned int num_of_valid_entries;
3227 	u8 vmap_count = 0;
3228 
3229 	/* Due to the hard binding of mmu_command_instr with kctx_id via kbase_mmu_hw_op_param,
3230 	 * here we skip the no kctx case, which is only used with MCU's mmut.
3231 	 */
3232 	if (!mmut->kctx)
3233 		return -EINVAL;
3234 
3235 	if (level > MIDGARD_MMU_BOTTOMLEVEL)
3236 		return -EINVAL;
3237 	else if (level == MIDGARD_MMU_BOTTOMLEVEL)
3238 		vpfn = page_md->data.mapped.vpfn;
3239 	else
3240 		vpfn = PGD_VPFN_LEVEL_GET_VPFN(page_md->data.pt_mapped.pgd_vpfn_level);
3241 
3242 	kbdev = mmut->kctx->kbdev;
3243 	index = (vpfn >> ((3 - level) * 9)) & 0x1FF;
3244 
3245 	/* Create all mappings before copying content.
3246 	 * This is done as early as possible because is the only operation that may
3247 	 * fail. It is possible to do this before taking any locks because the
3248 	 * pages to migrate are not going to change and even the parent PGD is not
3249 	 * going to be affected by any other concurrent operation, since the page
3250 	 * has been isolated before migration and therefore it cannot disappear in
3251 	 * the middle of this function.
3252 	 */
3253 	old_page = kmap(as_page(old_phys));
3254 	if (!old_page) {
3255 		dev_warn(kbdev->dev, "%s: kmap failure for old page.", __func__);
3256 		ret = -EINVAL;
3257 		goto old_page_map_error;
3258 	}
3259 
3260 	new_page = kmap(as_page(new_phys));
3261 	if (!new_page) {
3262 		dev_warn(kbdev->dev, "%s: kmap failure for new page.", __func__);
3263 		ret = -EINVAL;
3264 		goto new_page_map_error;
3265 	}
3266 
3267 	/* GPU cache maintenance affects both memory content and page table,
3268 	 * but at two different stages. A single virtual memory page is affected
3269 	 * by the migration.
3270 	 *
3271 	 * Notice that the MMU maintenance is done in the following steps:
3272 	 *
3273 	 * 1) The MMU region is locked without performing any other operation.
3274 	 *    This lock must cover the entire migration process, in order to
3275 	 *    prevent any GPU access to the virtual page whose physical page
3276 	 *    is being migrated.
3277 	 * 2) Immediately after locking: the MMU region content is flushed via
3278 	 *    GPU control while the lock is taken and without unlocking.
3279 	 *    The region must stay locked for the duration of the whole page
3280 	 *    migration procedure.
3281 	 *    This is necessary to make sure that pending writes to the old page
3282 	 *    are finalized before copying content to the new page.
3283 	 * 3) Before unlocking: changes to the page table are flushed.
3284 	 *    Finer-grained GPU control operations are used if possible, otherwise
3285 	 *    the whole GPU cache shall be flushed again.
3286 	 *    This is necessary to make sure that the GPU accesses the new page
3287 	 *    after migration.
3288 	 * 4) The MMU region is unlocked.
3289 	 */
3290 #define PGD_VPFN_MASK(level) (~((((u64)1) << ((3 - level) * 9)) - 1))
3291 	op_param.mmu_sync_info = CALLER_MMU_ASYNC;
3292 	op_param.kctx_id = mmut->kctx->id;
3293 	op_param.vpfn = vpfn & PGD_VPFN_MASK(level);
3294 	op_param.nr = 1 << ((3 - level) * 9);
3295 	op_param.op = KBASE_MMU_OP_FLUSH_PT;
3296 	/* When level is not MIDGARD_MMU_BOTTOMLEVEL, it is assumed PGD page migration */
3297 	op_param.flush_skip_levels = (level == MIDGARD_MMU_BOTTOMLEVEL) ?
3298 					     pgd_level_to_skip_flush(1ULL << level) :
3299 					     pgd_level_to_skip_flush(3ULL << level);
3300 
3301 	mutex_lock(&mmut->mmu_lock);
3302 
3303 	/* The state was evaluated before entering this function, but it could
3304 	 * have changed before the mmu_lock was taken. However, the state
3305 	 * transitions which are possible at this point are only two, and in both
3306 	 * cases it is a stable state progressing to a "free in progress" state.
3307 	 *
3308 	 * After taking the mmu_lock the state can no longer change: read it again
3309 	 * and make sure that it hasn't changed before continuing.
3310 	 */
3311 	spin_lock(&page_md->migrate_lock);
3312 	check_state = PAGE_STATUS_GET(page_md->status);
3313 	if (level == MIDGARD_MMU_BOTTOMLEVEL)
3314 		vmap_count = page_md->vmap_count;
3315 	spin_unlock(&page_md->migrate_lock);
3316 
3317 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
3318 		if (check_state != ALLOCATED_MAPPED) {
3319 			dev_dbg(kbdev->dev,
3320 				"%s: state changed to %d (was %d), abort page migration", __func__,
3321 				check_state, ALLOCATED_MAPPED);
3322 			ret = -EAGAIN;
3323 			goto page_state_change_out;
3324 		} else if (vmap_count > 0) {
3325 			dev_dbg(kbdev->dev, "%s: page was multi-mapped, abort page migration",
3326 				__func__);
3327 			ret = -EAGAIN;
3328 			goto page_state_change_out;
3329 		}
3330 	} else {
3331 		if (check_state != PT_MAPPED) {
3332 			dev_dbg(kbdev->dev,
3333 				"%s: state changed to %d (was %d), abort PGD page migration",
3334 				__func__, check_state, PT_MAPPED);
3335 			WARN_ON_ONCE(check_state != FREE_PT_ISOLATED_IN_PROGRESS);
3336 			ret = -EAGAIN;
3337 			goto page_state_change_out;
3338 		}
3339 	}
3340 
3341 	ret = mmu_get_pgd_at_level(kbdev, mmut, vpfn, level, &pgd);
3342 	if (ret) {
3343 		dev_err(kbdev->dev, "%s: failed to find PGD for old page.", __func__);
3344 		goto get_pgd_at_level_error;
3345 	}
3346 
3347 	pgd_page = kmap(phys_to_page(pgd));
3348 	if (!pgd_page) {
3349 		dev_warn(kbdev->dev, "%s: kmap failure for PGD page.", __func__);
3350 		ret = -EINVAL;
3351 		goto pgd_page_map_error;
3352 	}
3353 
3354 	mutex_lock(&kbdev->pm.lock);
3355 	mutex_lock(&kbdev->mmu_hw_mutex);
3356 
3357 	/* Lock MMU region and flush GPU cache by using GPU control,
3358 	 * in order to keep MMU region locked.
3359 	 */
3360 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
3361 	if (unlikely(!kbase_pm_l2_allow_mmu_page_migration(kbdev))) {
3362 		/* Defer the migration as L2 is in a transitional phase */
3363 		spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3364 		mutex_unlock(&kbdev->mmu_hw_mutex);
3365 		mutex_unlock(&kbdev->pm.lock);
3366 		dev_dbg(kbdev->dev, "%s: L2 in transtion, abort PGD page migration", __func__);
3367 		ret = -EAGAIN;
3368 		goto l2_state_defer_out;
3369 	}
3370 	/* Prevent transitional phases in L2 by starting the transaction */
3371 	mmu_page_migration_transaction_begin(kbdev);
3372 	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
3373 		int as_nr = mmut->kctx->as_nr;
3374 		struct kbase_as *as = &kbdev->as[as_nr];
3375 
3376 		ret = kbase_mmu_hw_do_lock(kbdev, as, &op_param);
3377 		if (!ret) {
3378 				ret = kbase_gpu_cache_flush_and_busy_wait(
3379 					kbdev, GPU_COMMAND_CACHE_CLN_INV_L2_LSC);
3380 		}
3381 		if (ret)
3382 			mmu_page_migration_transaction_end(kbdev);
3383 	}
3384 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3385 
3386 	if (ret < 0) {
3387 		mutex_unlock(&kbdev->mmu_hw_mutex);
3388 		mutex_unlock(&kbdev->pm.lock);
3389 		dev_err(kbdev->dev, "%s: failed to lock MMU region or flush GPU cache", __func__);
3390 		goto undo_mappings;
3391 	}
3392 
3393 	/* Copy memory content.
3394 	 *
3395 	 * It is necessary to claim the ownership of the DMA buffer for the old
3396 	 * page before performing the copy, to make sure of reading a consistent
3397 	 * version of its content, before copying. After the copy, ownership of
3398 	 * the DMA buffer for the new page is given to the GPU in order to make
3399 	 * the content visible to potential GPU access that may happen as soon as
3400 	 * this function releases the lock on the MMU region.
3401 	 */
3402 	dma_sync_single_for_cpu(kbdev->dev, old_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
3403 	memcpy(new_page, old_page, PAGE_SIZE);
3404 	dma_sync_single_for_device(kbdev->dev, new_dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
3405 
3406 	/* Remap GPU virtual page.
3407 	 *
3408 	 * This code rests on the assumption that page migration is only enabled
3409 	 * for 4 kB pages, that necessarily live in the bottom level of the MMU
3410 	 * page table. For this reason, the PGD level tells us inequivocably
3411 	 * whether the page being migrated is a "content page" or another PGD
3412 	 * of the page table:
3413 	 *
3414 	 * - Bottom level implies ATE (Address Translation Entry)
3415 	 * - Any other level implies PTE (Page Table Entry)
3416 	 *
3417 	 * The current implementation doesn't handle the case of a level 0 PGD,
3418 	 * that is: the root PGD of the page table.
3419 	 */
3420 	target = &pgd_page[index];
3421 
3422 	/* Certain entries of a page table page encode the count of valid entries
3423 	 * present in that page. So need to save & restore the count information
3424 	 * when updating the PTE/ATE to point to the new page.
3425 	 */
3426 	num_of_valid_entries = kbdev->mmu_mode->get_num_valid_entries(pgd_page);
3427 
3428 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
3429 		WARN_ON_ONCE((*target & 1UL) == 0);
3430 		*target =
3431 			kbase_mmu_create_ate(kbdev, new_phys, page_md->data.mapped.reg->flags,
3432 					     level, page_md->data.mapped.reg->gpu_alloc->group_id);
3433 	} else {
3434 		u64 managed_pte;
3435 
3436 #ifdef CONFIG_MALI_BIFROST_DEBUG
3437 		/* The PTE should be pointing to the page being migrated */
3438 		WARN_ON_ONCE(as_phys_addr_t(old_phys) != kbdev->mmu_mode->pte_to_phy_addr(
3439 			kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
3440 				kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, pgd_page[index])));
3441 #endif
3442 		kbdev->mmu_mode->entry_set_pte(&managed_pte, as_phys_addr_t(new_phys));
3443 		*target = kbdev->mgm_dev->ops.mgm_update_gpu_pte(
3444 			kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP, level, managed_pte);
3445 	}
3446 
3447 	kbdev->mmu_mode->set_num_valid_entries(pgd_page, num_of_valid_entries);
3448 
3449 	/* This function always updates a single entry inside an existing PGD,
3450 	 * therefore cache maintenance is necessary and affects a single entry.
3451 	 */
3452 	kbase_mmu_sync_pgd(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
3453 			   kbase_dma_addr(phys_to_page(pgd)) + (index * sizeof(u64)), sizeof(u64),
3454 			   KBASE_MMU_OP_FLUSH_PT);
3455 
3456 	/* Unlock MMU region.
3457 	 *
3458 	 * Notice that GPUs which don't issue flush commands via GPU control
3459 	 * still need an additional GPU cache flush here, this time only
3460 	 * for the page table, because the function call above to sync PGDs
3461 	 * won't have any effect on them.
3462 	 */
3463 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
3464 	if (kbdev->pm.backend.gpu_powered && mmut->kctx->as_nr >= 0) {
3465 		int as_nr = mmut->kctx->as_nr;
3466 		struct kbase_as *as = &kbdev->as[as_nr];
3467 
3468 		if (mmu_flush_cache_on_gpu_ctrl(kbdev)) {
3469 			ret = kbase_mmu_hw_do_unlock(kbdev, as, &op_param);
3470 		} else {
3471 			ret = kbase_gpu_cache_flush_and_busy_wait(kbdev,
3472 								  GPU_COMMAND_CACHE_CLN_INV_L2);
3473 			if (!ret)
3474 				ret = kbase_mmu_hw_do_unlock_no_addr(kbdev, as, &op_param);
3475 		}
3476 	}
3477 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3478 	/* Releasing locks before checking the migration transaction error state */
3479 	mutex_unlock(&kbdev->mmu_hw_mutex);
3480 	mutex_unlock(&kbdev->pm.lock);
3481 
3482 	spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
3483 	/* Release the transition prevention in L2 by ending the transaction */
3484 	mmu_page_migration_transaction_end(kbdev);
3485 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, hwaccess_flags);
3486 
3487 	/* Checking the final migration transaction error state */
3488 	if (ret < 0) {
3489 		dev_err(kbdev->dev, "%s: failed to unlock MMU region.", __func__);
3490 		goto undo_mappings;
3491 	}
3492 
3493 	/* Undertaking metadata transfer, while we are holding the mmu_lock */
3494 	spin_lock(&page_md->migrate_lock);
3495 	if (level == MIDGARD_MMU_BOTTOMLEVEL) {
3496 		size_t page_array_index =
3497 			page_md->data.mapped.vpfn - page_md->data.mapped.reg->start_pfn;
3498 
3499 		WARN_ON(PAGE_STATUS_GET(page_md->status) != ALLOCATED_MAPPED);
3500 
3501 		/* Replace page in array of pages of the physical allocation. */
3502 		page_md->data.mapped.reg->gpu_alloc->pages[page_array_index] = new_phys;
3503 	}
3504 	/* Update the new page dma_addr with the transferred metadata from the old_page */
3505 	page_md->dma_addr = new_dma_addr;
3506 	page_md->status = PAGE_ISOLATE_SET(page_md->status, 0);
3507 	spin_unlock(&page_md->migrate_lock);
3508 	set_page_private(as_page(new_phys), (unsigned long)page_md);
3509 	/* Old page metatdata pointer cleared as it now owned by the new page */
3510 	set_page_private(as_page(old_phys), 0);
3511 
3512 l2_state_defer_out:
3513 	kunmap(phys_to_page(pgd));
3514 pgd_page_map_error:
3515 get_pgd_at_level_error:
3516 page_state_change_out:
3517 	mutex_unlock(&mmut->mmu_lock);
3518 
3519 	kunmap(as_page(new_phys));
3520 new_page_map_error:
3521 	kunmap(as_page(old_phys));
3522 old_page_map_error:
3523 	return ret;
3524 
3525 undo_mappings:
3526 	/* Unlock the MMU table and undo mappings. */
3527 	mutex_unlock(&mmut->mmu_lock);
3528 	kunmap(phys_to_page(pgd));
3529 	kunmap(as_page(new_phys));
3530 	kunmap(as_page(old_phys));
3531 
3532 	return ret;
3533 }
3534 
mmu_teardown_level(struct kbase_device * kbdev,struct kbase_mmu_table * mmut,phys_addr_t pgd,unsigned int level)3535 static void mmu_teardown_level(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
3536 			       phys_addr_t pgd, unsigned int level)
3537 {
3538 	u64 *pgd_page;
3539 	int i;
3540 	struct memory_group_manager_device *mgm_dev = kbdev->mgm_dev;
3541 	struct kbase_mmu_mode const *mmu_mode = kbdev->mmu_mode;
3542 	u64 *pgd_page_buffer = NULL;
3543 	struct page *p = phys_to_page(pgd);
3544 
3545 	lockdep_assert_held(&mmut->mmu_lock);
3546 
3547 	pgd_page = kmap_atomic(p);
3548 	/* kmap_atomic should NEVER fail. */
3549 	if (WARN_ON_ONCE(pgd_page == NULL))
3550 		return;
3551 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
3552 		/* Copy the page to our preallocated buffer so that we can minimize
3553 		 * kmap_atomic usage
3554 		 */
3555 		pgd_page_buffer = mmut->scratch_mem.teardown_pages.levels[level];
3556 		memcpy(pgd_page_buffer, pgd_page, PAGE_SIZE);
3557 	}
3558 
3559 	/* When page migration is enabled, kbase_region_tracker_term() would ensure
3560 	 * there are no pages left mapped on the GPU for a context. Hence the count
3561 	 * of valid entries is expected to be zero here.
3562 	 */
3563 	if (kbase_page_migration_enabled && mmut->kctx)
3564 		WARN_ON_ONCE(kbdev->mmu_mode->get_num_valid_entries(pgd_page));
3565 	/* Invalidate page after copying */
3566 	mmu_mode->entries_invalidate(pgd_page, KBASE_MMU_PAGE_ENTRIES);
3567 	kunmap_atomic(pgd_page);
3568 	pgd_page = pgd_page_buffer;
3569 
3570 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
3571 		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
3572 			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
3573 				phys_addr_t target_pgd = mmu_mode->pte_to_phy_addr(
3574 					mgm_dev->ops.mgm_pte_to_original_pte(mgm_dev,
3575 									     MGM_DEFAULT_PTE_GROUP,
3576 									     level, pgd_page[i]));
3577 
3578 				mmu_teardown_level(kbdev, mmut, target_pgd, level + 1);
3579 			}
3580 		}
3581 	}
3582 
3583 	kbase_mmu_free_pgd(kbdev, mmut, pgd);
3584 }
3585 
kbase_mmu_init(struct kbase_device * const kbdev,struct kbase_mmu_table * const mmut,struct kbase_context * const kctx,int const group_id)3586 int kbase_mmu_init(struct kbase_device *const kbdev,
3587 	struct kbase_mmu_table *const mmut, struct kbase_context *const kctx,
3588 	int const group_id)
3589 {
3590 	if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS) ||
3591 	    WARN_ON(group_id < 0))
3592 		return -EINVAL;
3593 
3594 	compiletime_assert(KBASE_MEM_ALLOC_MAX_SIZE <= (((8ull << 30) >> PAGE_SHIFT)),
3595 			   "List of free PGDs may not be large enough.");
3596 	compiletime_assert(MAX_PAGES_FOR_FREE_PGDS >= MIDGARD_MMU_BOTTOMLEVEL,
3597 			   "Array of MMU levels is not large enough.");
3598 
3599 	mmut->group_id = group_id;
3600 	mutex_init(&mmut->mmu_lock);
3601 	mmut->kctx = kctx;
3602 	mmut->pgd = KBASE_MMU_INVALID_PGD_ADDRESS;
3603 
3604 	/* We allocate pages into the kbdev memory pool, then
3605 	 * kbase_mmu_alloc_pgd will allocate out of that pool. This is done to
3606 	 * avoid allocations from the kernel happening with the lock held.
3607 	 */
3608 	while (mmut->pgd == KBASE_MMU_INVALID_PGD_ADDRESS) {
3609 		int err;
3610 
3611 		err = kbase_mem_pool_grow(
3612 			&kbdev->mem_pools.small[mmut->group_id],
3613 			MIDGARD_MMU_BOTTOMLEVEL, kctx ? kctx->task : NULL);
3614 		if (err) {
3615 			kbase_mmu_term(kbdev, mmut);
3616 			return -ENOMEM;
3617 		}
3618 
3619 		mutex_lock(&mmut->mmu_lock);
3620 		mmut->pgd = kbase_mmu_alloc_pgd(kbdev, mmut);
3621 		mutex_unlock(&mmut->mmu_lock);
3622 	}
3623 
3624 	return 0;
3625 }
3626 
kbase_mmu_term(struct kbase_device * kbdev,struct kbase_mmu_table * mmut)3627 void kbase_mmu_term(struct kbase_device *kbdev, struct kbase_mmu_table *mmut)
3628 {
3629 	WARN((mmut->kctx) && (mmut->kctx->as_nr != KBASEP_AS_NR_INVALID),
3630 	     "kctx-%d_%d must first be scheduled out to flush GPU caches+tlbs before tearing down MMU tables",
3631 	     mmut->kctx->tgid, mmut->kctx->id);
3632 
3633 	if (mmut->pgd != KBASE_MMU_INVALID_PGD_ADDRESS) {
3634 		mutex_lock(&mmut->mmu_lock);
3635 		mmu_teardown_level(kbdev, mmut, mmut->pgd, MIDGARD_MMU_TOPLEVEL);
3636 		mutex_unlock(&mmut->mmu_lock);
3637 
3638 		if (mmut->kctx)
3639 			KBASE_TLSTREAM_AUX_PAGESALLOC(kbdev, mmut->kctx->id, 0);
3640 	}
3641 
3642 	mutex_destroy(&mmut->mmu_lock);
3643 }
3644 
kbase_mmu_as_term(struct kbase_device * kbdev,unsigned int i)3645 void kbase_mmu_as_term(struct kbase_device *kbdev, unsigned int i)
3646 {
3647 	destroy_workqueue(kbdev->as[i].pf_wq);
3648 }
3649 
kbase_mmu_flush_pa_range(struct kbase_device * kbdev,struct kbase_context * kctx,phys_addr_t phys,size_t size,enum kbase_mmu_op_type flush_op)3650 void kbase_mmu_flush_pa_range(struct kbase_device *kbdev, struct kbase_context *kctx,
3651 			      phys_addr_t phys, size_t size,
3652 			      enum kbase_mmu_op_type flush_op)
3653 {
3654 #if MALI_USE_CSF
3655 	unsigned long irq_flags;
3656 
3657 	spin_lock_irqsave(&kbdev->hwaccess_lock, irq_flags);
3658 	if (mmu_flush_cache_on_gpu_ctrl(kbdev) && (flush_op != KBASE_MMU_OP_NONE) &&
3659 	    kbdev->pm.backend.gpu_powered && (!kctx || kctx->as_nr >= 0))
3660 		mmu_flush_pa_range(kbdev, phys, size, KBASE_MMU_OP_FLUSH_PT);
3661 	spin_unlock_irqrestore(&kbdev->hwaccess_lock, irq_flags);
3662 #endif
3663 }
3664 
3665 #ifdef CONFIG_MALI_VECTOR_DUMP
kbasep_mmu_dump_level(struct kbase_context * kctx,phys_addr_t pgd,int level,char ** const buffer,size_t * size_left)3666 static size_t kbasep_mmu_dump_level(struct kbase_context *kctx, phys_addr_t pgd,
3667 		int level, char ** const buffer, size_t *size_left)
3668 {
3669 	phys_addr_t target_pgd;
3670 	u64 *pgd_page;
3671 	int i;
3672 	size_t size = KBASE_MMU_PAGE_ENTRIES * sizeof(u64) + sizeof(u64);
3673 	size_t dump_size;
3674 	struct kbase_device *kbdev;
3675 	struct kbase_mmu_mode const *mmu_mode;
3676 
3677 	if (WARN_ON(kctx == NULL))
3678 		return 0;
3679 	lockdep_assert_held(&kctx->mmu.mmu_lock);
3680 
3681 	kbdev = kctx->kbdev;
3682 	mmu_mode = kbdev->mmu_mode;
3683 
3684 	pgd_page = kmap(pfn_to_page(PFN_DOWN(pgd)));
3685 	if (!pgd_page) {
3686 		dev_warn(kbdev->dev, "%s: kmap failure", __func__);
3687 		return 0;
3688 	}
3689 
3690 	if (*size_left >= size) {
3691 		/* A modified physical address that contains
3692 		 * the page table level
3693 		 */
3694 		u64 m_pgd = pgd | level;
3695 
3696 		/* Put the modified physical address in the output buffer */
3697 		memcpy(*buffer, &m_pgd, sizeof(m_pgd));
3698 		*buffer += sizeof(m_pgd);
3699 
3700 		/* Followed by the page table itself */
3701 		memcpy(*buffer, pgd_page, sizeof(u64) * KBASE_MMU_PAGE_ENTRIES);
3702 		*buffer += sizeof(u64) * KBASE_MMU_PAGE_ENTRIES;
3703 
3704 		*size_left -= size;
3705 	}
3706 
3707 	if (level < MIDGARD_MMU_BOTTOMLEVEL) {
3708 		for (i = 0; i < KBASE_MMU_PAGE_ENTRIES; i++) {
3709 			if (mmu_mode->pte_is_valid(pgd_page[i], level)) {
3710 				target_pgd = mmu_mode->pte_to_phy_addr(
3711 					kbdev->mgm_dev->ops.mgm_pte_to_original_pte(
3712 						kbdev->mgm_dev, MGM_DEFAULT_PTE_GROUP,
3713 						level, pgd_page[i]));
3714 
3715 				dump_size = kbasep_mmu_dump_level(kctx,
3716 						target_pgd, level + 1,
3717 						buffer, size_left);
3718 				if (!dump_size) {
3719 					kunmap(pfn_to_page(PFN_DOWN(pgd)));
3720 					return 0;
3721 				}
3722 				size += dump_size;
3723 			}
3724 		}
3725 	}
3726 
3727 	kunmap(pfn_to_page(PFN_DOWN(pgd)));
3728 
3729 	return size;
3730 }
3731 
kbase_mmu_dump(struct kbase_context * kctx,int nr_pages)3732 void *kbase_mmu_dump(struct kbase_context *kctx, int nr_pages)
3733 {
3734 	void *kaddr;
3735 	size_t size_left;
3736 
3737 	KBASE_DEBUG_ASSERT(kctx);
3738 
3739 	if (nr_pages == 0) {
3740 		/* can't dump in a 0 sized buffer, early out */
3741 		return NULL;
3742 	}
3743 
3744 	size_left = nr_pages * PAGE_SIZE;
3745 
3746 	if (WARN_ON(size_left == 0))
3747 		return NULL;
3748 	kaddr = vmalloc_user(size_left);
3749 
3750 	mutex_lock(&kctx->mmu.mmu_lock);
3751 
3752 	if (kaddr) {
3753 		u64 end_marker = 0xFFULL;
3754 		char *buffer;
3755 		char *mmu_dump_buffer;
3756 		u64 config[3];
3757 		size_t dump_size, size = 0;
3758 		struct kbase_mmu_setup as_setup;
3759 
3760 		buffer = (char *)kaddr;
3761 		mmu_dump_buffer = buffer;
3762 
3763 		kctx->kbdev->mmu_mode->get_as_setup(&kctx->mmu,
3764 				&as_setup);
3765 		config[0] = as_setup.transtab;
3766 		config[1] = as_setup.memattr;
3767 		config[2] = as_setup.transcfg;
3768 		memcpy(buffer, &config, sizeof(config));
3769 		mmu_dump_buffer += sizeof(config);
3770 		size_left -= sizeof(config);
3771 		size += sizeof(config);
3772 
3773 		dump_size = kbasep_mmu_dump_level(kctx,
3774 				kctx->mmu.pgd,
3775 				MIDGARD_MMU_TOPLEVEL,
3776 				&mmu_dump_buffer,
3777 				&size_left);
3778 
3779 		if (!dump_size)
3780 			goto fail_free;
3781 
3782 		size += dump_size;
3783 
3784 		/* Add on the size for the end marker */
3785 		size += sizeof(u64);
3786 
3787 		if (size > (nr_pages * PAGE_SIZE)) {
3788 			/* The buffer isn't big enough - free the memory and
3789 			 * return failure
3790 			 */
3791 			goto fail_free;
3792 		}
3793 
3794 		/* Add the end marker */
3795 		memcpy(mmu_dump_buffer, &end_marker, sizeof(u64));
3796 	}
3797 
3798 	mutex_unlock(&kctx->mmu.mmu_lock);
3799 	return kaddr;
3800 
3801 fail_free:
3802 	vfree(kaddr);
3803 	mutex_unlock(&kctx->mmu.mmu_lock);
3804 	return NULL;
3805 }
3806 KBASE_EXPORT_TEST_API(kbase_mmu_dump);
3807 #endif /* CONFIG_MALI_VECTOR_DUMP */
3808 
kbase_mmu_bus_fault_worker(struct work_struct * data)3809 void kbase_mmu_bus_fault_worker(struct work_struct *data)
3810 {
3811 	struct kbase_as *faulting_as;
3812 	int as_no;
3813 	struct kbase_context *kctx;
3814 	struct kbase_device *kbdev;
3815 	struct kbase_fault *fault;
3816 
3817 	faulting_as = container_of(data, struct kbase_as, work_busfault);
3818 	fault = &faulting_as->bf_data;
3819 
3820 	/* Ensure that any pending page fault worker has completed */
3821 	flush_work(&faulting_as->work_pagefault);
3822 
3823 	as_no = faulting_as->number;
3824 
3825 	kbdev = container_of(faulting_as, struct kbase_device, as[as_no]);
3826 
3827 	/* Grab the context, already refcounted in kbase_mmu_interrupt() on
3828 	 * flagging of the bus-fault. Therefore, it cannot be scheduled out of
3829 	 * this AS until we explicitly release it
3830 	 */
3831 	kctx = kbase_ctx_sched_as_to_ctx(kbdev, as_no);
3832 	if (!kctx) {
3833 		atomic_dec(&kbdev->faults_pending);
3834 		return;
3835 	}
3836 
3837 #ifdef CONFIG_MALI_ARBITER_SUPPORT
3838 	/* check if we still have GPU */
3839 	if (unlikely(kbase_is_gpu_removed(kbdev))) {
3840 		dev_dbg(kbdev->dev, "%s: GPU has been removed", __func__);
3841 		release_ctx(kbdev, kctx);
3842 		atomic_dec(&kbdev->faults_pending);
3843 		return;
3844 	}
3845 #endif
3846 
3847 	if (unlikely(fault->protected_mode)) {
3848 		kbase_mmu_report_fault_and_kill(kctx, faulting_as,
3849 				"Permission failure", fault);
3850 		kbase_mmu_hw_clear_fault(kbdev, faulting_as,
3851 				KBASE_MMU_FAULT_TYPE_BUS_UNEXPECTED);
3852 		release_ctx(kbdev, kctx);
3853 		atomic_dec(&kbdev->faults_pending);
3854 		return;
3855 
3856 	}
3857 
3858 #if MALI_USE_CSF
3859 	/* Before the GPU power off, wait is done for the completion of
3860 	 * in-flight MMU fault work items. So GPU is expected to remain
3861 	 * powered up whilst the bus fault handling is being done.
3862 	 */
3863 	kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
3864 #else
3865 	/* NOTE: If GPU already powered off for suspend,
3866 	 * we don't need to switch to unmapped
3867 	 */
3868 	if (!kbase_pm_context_active_handle_suspend(kbdev,
3869 				KBASE_PM_SUSPEND_HANDLER_DONT_REACTIVATE)) {
3870 		kbase_gpu_report_bus_fault_and_kill(kctx, faulting_as, fault);
3871 		kbase_pm_context_idle(kbdev);
3872 	}
3873 #endif
3874 
3875 	release_ctx(kbdev, kctx);
3876 
3877 	atomic_dec(&kbdev->faults_pending);
3878 }
3879 
kbase_flush_mmu_wqs(struct kbase_device * kbdev)3880 void kbase_flush_mmu_wqs(struct kbase_device *kbdev)
3881 {
3882 	int i;
3883 
3884 	for (i = 0; i < kbdev->nr_hw_address_spaces; i++) {
3885 		struct kbase_as *as = &kbdev->as[i];
3886 
3887 		flush_workqueue(as->pf_wq);
3888 	}
3889 }
3890