1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3 *
4 * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /**
23 * DOC: Defintions (types, defines, etcs) common to Kbase. They are placed here
24 * to allow the hierarchy of header files to work.
25 */
26
27 #ifndef _KBASE_DEFS_H_
28 #define _KBASE_DEFS_H_
29
30 #include <mali_kbase_config.h>
31 #include <mali_base_hwconfig_features.h>
32 #include <mali_base_hwconfig_issues.h>
33 #include <mali_kbase_mem_lowlevel.h>
34 #include <mmu/mali_kbase_mmu_hw.h>
35 #include <backend/gpu/mali_kbase_instr_defs.h>
36 #include <mali_kbase_pm.h>
37 #include <mali_kbase_gpuprops_types.h>
38 #include <hwcnt/mali_kbase_hwcnt_watchdog_if.h>
39
40 #if MALI_USE_CSF
41 #include <hwcnt/backend/mali_kbase_hwcnt_backend_csf.h>
42 #else
43 #include <hwcnt/backend/mali_kbase_hwcnt_backend_jm.h>
44 #include <hwcnt/backend/mali_kbase_hwcnt_backend_jm_watchdog.h>
45 #endif
46
47 #include <protected_mode_switcher.h>
48
49 #include <linux/atomic.h>
50 #include <linux/mempool.h>
51 #include <linux/slab.h>
52 #include <linux/file.h>
53 #include <linux/sizes.h>
54
55
56 #include "mali_kbase_fence_defs.h"
57
58 #if IS_ENABLED(CONFIG_DEBUG_FS)
59 #include <linux/debugfs.h>
60 #endif /* CONFIG_DEBUG_FS */
61
62 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
63 #include <linux/devfreq.h>
64 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
65
66 #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
67 #include <linux/devfreq_cooling.h>
68 #endif
69
70 #ifdef CONFIG_MALI_ARBITER_SUPPORT
71 #include <arbiter/mali_kbase_arbiter_defs.h>
72 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
73
74 #include <linux/clk.h>
75 #include <linux/regulator/consumer.h>
76 #include <linux/memory_group_manager.h>
77 #include <soc/rockchip/rockchip_opp_select.h>
78
79 #include "debug/mali_kbase_debug_ktrace_defs.h"
80
81 /** Number of milliseconds before we time out on a GPU soft/hard reset */
82 #define RESET_TIMEOUT 500
83
84 /**
85 * BASE_JM_MAX_NR_SLOTS - The maximum number of Job Slots to support in the Hardware.
86 *
87 * You can optimize this down if your target devices will only ever support a
88 * small number of job slots.
89 */
90 #define BASE_JM_MAX_NR_SLOTS 3
91
92 /**
93 * BASE_MAX_NR_AS - The maximum number of Address Spaces to support in the Hardware.
94 *
95 * You can optimize this down if your target devices will only ever support a
96 * small number of Address Spaces
97 */
98 #define BASE_MAX_NR_AS 16
99
100 /* mmu */
101 #define MIDGARD_MMU_LEVEL(x) (x)
102
103 #define MIDGARD_MMU_TOPLEVEL MIDGARD_MMU_LEVEL(0)
104
105 #define MIDGARD_MMU_BOTTOMLEVEL MIDGARD_MMU_LEVEL(3)
106
107 #define GROWABLE_FLAGS_REQUIRED (KBASE_REG_PF_GROW | KBASE_REG_GPU_WR)
108
109 /** setting in kbase_context::as_nr that indicates it's invalid */
110 #define KBASEP_AS_NR_INVALID (-1)
111
112 /**
113 * KBASE_LOCK_REGION_MAX_SIZE_LOG2 - Maximum size in bytes of a MMU lock region,
114 * as a logarithm
115 */
116 #define KBASE_LOCK_REGION_MAX_SIZE_LOG2 (48) /* 256 TB */
117
118 /**
119 * KBASE_REG_ZONE_MAX - Maximum number of GPU memory region zones
120 */
121 #if MALI_USE_CSF
122 #define KBASE_REG_ZONE_MAX 6ul
123 #else
124 #define KBASE_REG_ZONE_MAX 4ul
125 #endif
126
127 #include "mali_kbase_hwaccess_defs.h"
128
129 /* Maximum number of pages of memory that require a permanent mapping, per
130 * kbase_context
131 */
132 #define KBASE_PERMANENTLY_MAPPED_MEM_LIMIT_PAGES ((64 * 1024ul * 1024ul) >> PAGE_SHIFT)
133 /* Minimum threshold period for hwcnt dumps between different hwcnt virtualizer
134 * clients, to reduce undesired system load.
135 * If a virtualizer client requests a dump within this threshold period after
136 * some other client has performed a dump, a new dump won't be performed and
137 * the accumulated counter values for that client will be returned instead.
138 */
139 #define KBASE_HWCNT_GPU_VIRTUALIZER_DUMP_THRESHOLD_NS (200 * NSEC_PER_USEC)
140
141 #if MALI_USE_CSF
142 /* The buffer count of CSF hwcnt backend ring buffer, which is used when CSF
143 * hwcnt backend allocate the ring buffer to communicate with CSF firmware for
144 * HWC dump samples.
145 * To meet the hardware requirement, this number MUST be power of 2, otherwise,
146 * CSF hwcnt backend creation will be failed.
147 */
148 #define KBASE_HWCNT_BACKEND_CSF_RING_BUFFER_COUNT (128)
149 #endif
150
151 /* Maximum number of clock/regulator pairs that may be referenced by
152 * the device node.
153 * This is dependent on support for of_property_read_u64_array() in the
154 * kernel.
155 * While, the number of clocks could be more than regulators,
156 * as mentioned in power_control_init().
157 */
158 #define BASE_MAX_NR_CLOCKS_REGULATORS (4)
159
160 /* Forward declarations */
161 struct kbase_context;
162 struct kbase_device;
163 struct kbase_as;
164 struct kbase_mmu_setup;
165 struct kbase_kinstr_jm;
166
167 /**
168 * struct kbase_io_access - holds information about 1 register access
169 *
170 * @addr: first bit indicates r/w (r=0, w=1)
171 * @value: value written or read
172 */
173 struct kbase_io_access {
174 uintptr_t addr;
175 u32 value;
176 };
177
178 /**
179 * struct kbase_io_history - keeps track of all recent register accesses
180 *
181 * @enabled: true if register accesses are recorded, false otherwise
182 * @lock: spinlock protecting kbase_io_access array
183 * @count: number of registers read/written
184 * @size: number of elements in kbase_io_access array
185 * @buf: array of kbase_io_access
186 */
187 struct kbase_io_history {
188 bool enabled;
189
190 spinlock_t lock;
191 size_t count;
192 u16 size;
193 struct kbase_io_access *buf;
194 };
195
196 /**
197 * struct kbase_debug_copy_buffer - information about the buffer to be copied.
198 *
199 * @size: size of the buffer in bytes
200 * @pages: pointer to an array of pointers to the pages which contain
201 * the buffer
202 * @is_vmalloc: true if @pages was allocated with vzalloc. false if @pages was
203 * allocated with kcalloc
204 * @nr_pages: number of pages
205 * @offset: offset into the pages
206 * @gpu_alloc: pointer to physical memory allocated by the GPU
207 * @extres_pages: array of pointers to the pages containing external resources
208 * for this buffer
209 * @nr_extres_pages: number of pages in @extres_pages
210 */
211 struct kbase_debug_copy_buffer {
212 size_t size;
213 struct page **pages;
214 bool is_vmalloc;
215 int nr_pages;
216 size_t offset;
217 struct kbase_mem_phy_alloc *gpu_alloc;
218
219 struct page **extres_pages;
220 int nr_extres_pages;
221 };
222
223 struct kbase_device_info {
224 u32 features;
225 };
226
227 struct kbase_mmu_setup {
228 u64 transtab;
229 u64 memattr;
230 u64 transcfg;
231 };
232
233 /**
234 * struct kbase_fault - object containing data relating to a page or bus fault.
235 * @addr: Records the faulting address.
236 * @extra_addr: Records the secondary fault address.
237 * @status: Records the fault status as reported by Hw.
238 * @protected_mode: Flag indicating whether the fault occurred in protected mode
239 * or not.
240 */
241 struct kbase_fault {
242 u64 addr;
243 u64 extra_addr;
244 u32 status;
245 bool protected_mode;
246 };
247
248 /** Maximum number of memory pages that should be allocated for the array
249 * of pointers to free PGDs.
250 *
251 * This number has been pre-calculated to deal with the maximum allocation
252 * size expressed by the default value of KBASE_MEM_ALLOC_MAX_SIZE.
253 * This is supposed to be enough for almost the entirety of MMU operations.
254 * Any size greater than KBASE_MEM_ALLOC_MAX_SIZE requires being broken down
255 * into multiple iterations, each dealing with at most KBASE_MEM_ALLOC_MAX_SIZE
256 * bytes.
257 *
258 * Please update this value if KBASE_MEM_ALLOC_MAX_SIZE changes.
259 */
260 #define MAX_PAGES_FOR_FREE_PGDS ((size_t)9)
261
262 /* Maximum number of pointers to free PGDs */
263 #define MAX_FREE_PGDS ((PAGE_SIZE / sizeof(struct page *)) * MAX_PAGES_FOR_FREE_PGDS)
264
265 /**
266 * struct kbase_mmu_table - object representing a set of GPU page tables
267 * @mmu_lock: Lock to serialize the accesses made to multi level GPU
268 * page tables
269 * @pgd: Physical address of the page allocated for the top
270 * level page table of the context, this is used for
271 * MMU HW programming as the address translation will
272 * start from the top level page table.
273 * @group_id: A memory group ID to be passed to a platform-specific
274 * memory group manager.
275 * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
276 * @kctx: If this set of MMU tables belongs to a context then
277 * this is a back-reference to the context, otherwise
278 * it is NULL.
279 * @scratch_mem: Scratch memory used for MMU operations, which are
280 * serialized by the @mmu_lock.
281 */
282 struct kbase_mmu_table {
283 struct mutex mmu_lock;
284 phys_addr_t pgd;
285 u8 group_id;
286 struct kbase_context *kctx;
287 union {
288 /**
289 * @teardown_pages: Scratch memory used for backup copies of whole
290 * PGD pages when tearing down levels upon
291 * termination of the MMU table.
292 */
293 struct {
294 /**
295 * @levels: Array of PGD pages, large enough to copy one PGD
296 * for each level of the MMU table.
297 */
298 u64 levels[MIDGARD_MMU_BOTTOMLEVEL][PAGE_SIZE / sizeof(u64)];
299 } teardown_pages;
300 /**
301 * @free_pgds: Scratch memory user for insertion, update and teardown
302 * operations to store a temporary list of PGDs to be freed
303 * at the end of the operation.
304 */
305 struct {
306 /** @pgds: Array of pointers to PGDs to free. */
307 struct page *pgds[MAX_FREE_PGDS];
308 /** @head_index: Index of first free element in the PGDs array. */
309 size_t head_index;
310 } free_pgds;
311 } scratch_mem;
312 };
313
314 /**
315 * struct kbase_reg_zone - Information about GPU memory region zones
316 * @base_pfn: Page Frame Number in GPU virtual address space for the start of
317 * the Zone
318 * @va_size_pages: Size of the Zone in pages
319 *
320 * Track information about a zone KBASE_REG_ZONE() and related macros.
321 * In future, this could also store the &rb_root that are currently in
322 * &kbase_context and &kbase_csf_device.
323 */
324 struct kbase_reg_zone {
325 u64 base_pfn;
326 u64 va_size_pages;
327 };
328
329 #if MALI_USE_CSF
330 #include "csf/mali_kbase_csf_defs.h"
331 #else
332 #include "jm/mali_kbase_jm_defs.h"
333 #endif
334
335 #include "mali_kbase_hwaccess_time.h"
336
kbase_as_has_bus_fault(struct kbase_as * as,struct kbase_fault * fault)337 static inline int kbase_as_has_bus_fault(struct kbase_as *as,
338 struct kbase_fault *fault)
339 {
340 return (fault == &as->bf_data);
341 }
342
kbase_as_has_page_fault(struct kbase_as * as,struct kbase_fault * fault)343 static inline int kbase_as_has_page_fault(struct kbase_as *as,
344 struct kbase_fault *fault)
345 {
346 return (fault == &as->pf_data);
347 }
348
349 /**
350 * struct kbasep_mem_device - Data stored per device for memory allocation
351 *
352 * @used_pages: Tracks usage of OS shared memory. Updated when OS memory is
353 * allocated/freed.
354 * @ir_threshold: Fraction of the maximum size of an allocation that grows
355 * on GPU page fault that can be used before the driver
356 * switches to incremental rendering, in 1/256ths.
357 * 0 means disabled.
358 */
359 struct kbasep_mem_device {
360 atomic_t used_pages;
361 atomic_t ir_threshold;
362 };
363
364 struct kbase_clk_rate_listener;
365
366 /**
367 * typedef kbase_clk_rate_listener_on_change_t() - Frequency change callback
368 *
369 * @listener: Clock frequency change listener.
370 * @clk_index: Index of the clock for which the change has occurred.
371 * @clk_rate_hz: Clock frequency(Hz).
372 *
373 * A callback to call when clock rate changes. The function must not
374 * sleep. No clock rate manager functions must be called from here, as
375 * its lock is taken.
376 */
377 typedef void
378 kbase_clk_rate_listener_on_change_t(struct kbase_clk_rate_listener *listener,
379 u32 clk_index, u32 clk_rate_hz);
380
381 /**
382 * struct kbase_clk_rate_listener - Clock frequency listener
383 *
384 * @node: List node.
385 * @notify: Callback to be called when GPU frequency changes.
386 */
387 struct kbase_clk_rate_listener {
388 struct list_head node;
389 kbase_clk_rate_listener_on_change_t *notify;
390 };
391
392 /**
393 * struct kbase_clk_rate_trace_manager - Data stored per device for GPU clock
394 * rate trace manager.
395 *
396 * @gpu_idle: Tracks the idle state of GPU.
397 * @clks: Array of pointer to structures storing data for every
398 * enumerated GPU clock.
399 * @clk_rate_trace_ops: Pointer to the platform specific GPU clock rate trace
400 * operations.
401 * @listeners: List of listener attached.
402 * @lock: Lock to serialize the actions of GPU clock rate trace
403 * manager.
404 */
405 struct kbase_clk_rate_trace_manager {
406 bool gpu_idle;
407 struct kbase_clk_data *clks[BASE_MAX_NR_CLOCKS_REGULATORS];
408 struct kbase_clk_rate_trace_op_conf *clk_rate_trace_ops;
409 struct list_head listeners;
410 spinlock_t lock;
411 };
412
413 /**
414 * struct kbase_pm_device_data - Data stored per device for power management.
415 * @lock: The lock protecting Power Management structures accessed
416 * outside of IRQ.
417 * This lock must also be held whenever the GPU is being
418 * powered on or off.
419 * @active_count: The reference count of active contexts on this device.
420 * Note that some code paths keep shaders/the tiler
421 * powered whilst this is 0.
422 * Use kbase_pm_is_active() instead to check for such cases.
423 * @suspending: Flag indicating suspending/suspended
424 * @runtime_active: Flag to track if the GPU is in runtime suspended or active
425 * state. This ensures that runtime_put and runtime_get
426 * functions are called in pairs. For example if runtime_get
427 * has already been called from the power_on callback, then
428 * the call to it from runtime_gpu_active callback can be
429 * skipped.
430 * @gpu_lost: Flag indicating gpu lost
431 * This structure contains data for the power management framework.
432 * There is one instance of this structure per device in the system.
433 * @zero_active_count_wait: Wait queue set when active_count == 0
434 * @resume_wait: system resume of GPU device.
435 * @debug_core_mask: Bit masks identifying the available shader cores that are
436 * specified via sysfs. One mask per job slot.
437 * @debug_core_mask_all: Bit masks identifying the available shader cores that
438 * are specified via sysfs.
439 * @callback_power_runtime_init: Callback for initializing the runtime power
440 * management. Return 0 on success, else error code
441 * @callback_power_runtime_term: Callback for terminating the runtime power
442 * management.
443 * @dvfs_period: Time in milliseconds between each dvfs sample
444 * @backend: KBase PM backend data
445 * @arb_vm_state: The state of the arbiter VM machine
446 * @gpu_users_waiting: Used by virtualization to notify the arbiter that there
447 * are users waiting for the GPU so that it can request
448 * and resume the driver.
449 * @clk_rtm: The state of the GPU clock rate trace manager
450 */
451 struct kbase_pm_device_data {
452 struct mutex lock;
453 int active_count;
454 bool suspending;
455 #if MALI_USE_CSF
456 bool runtime_active;
457 #endif
458 #ifdef CONFIG_MALI_ARBITER_SUPPORT
459 atomic_t gpu_lost;
460 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
461 wait_queue_head_t zero_active_count_wait;
462 wait_queue_head_t resume_wait;
463
464 #if MALI_USE_CSF
465 u64 debug_core_mask;
466 #else
467 /* One mask per job slot. */
468 u64 debug_core_mask[BASE_JM_MAX_NR_SLOTS];
469 u64 debug_core_mask_all;
470 #endif /* MALI_USE_CSF */
471
472 int (*callback_power_runtime_init)(struct kbase_device *kbdev);
473 void (*callback_power_runtime_term)(struct kbase_device *kbdev);
474 u32 dvfs_period;
475 struct kbase_pm_backend_data backend;
476 #ifdef CONFIG_MALI_ARBITER_SUPPORT
477 struct kbase_arbiter_vm_state *arb_vm_state;
478 atomic_t gpu_users_waiting;
479 #endif /* CONFIG_MALI_ARBITER_SUPPORT */
480 struct kbase_clk_rate_trace_manager clk_rtm;
481 };
482
483 /**
484 * struct kbase_mem_pool - Page based memory pool for kctx/kbdev
485 * @kbdev: Kbase device where memory is used
486 * @cur_size: Number of free pages currently in the pool (may exceed
487 * @max_size in some corner cases)
488 * @max_size: Maximum number of free pages in the pool
489 * @order: order = 0 refers to a pool of 4 KB pages
490 * order = 9 refers to a pool of 2 MB pages (2^9 * 4KB = 2 MB)
491 * @group_id: A memory group ID to be passed to a platform-specific
492 * memory group manager, if present. Immutable.
493 * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
494 * @pool_lock: Lock protecting the pool - must be held when modifying
495 * @cur_size and @page_list
496 * @page_list: List of free pages in the pool
497 * @reclaim: Shrinker for kernel reclaim of free pages
498 * @isolation_in_progress_cnt: Number of pages in pool undergoing page isolation.
499 * This is used to avoid race condition between pool termination
500 * and page isolation for page migration.
501 * @next_pool: Pointer to next pool where pages can be allocated when this
502 * pool is empty. Pages will spill over to the next pool when
503 * this pool is full. Can be NULL if there is no next pool.
504 * @dying: true if the pool is being terminated, and any ongoing
505 * operations should be abandoned
506 * @dont_reclaim: true if the shrinker is forbidden from reclaiming memory from
507 * this pool, eg during a grow operation
508 */
509 struct kbase_mem_pool {
510 struct kbase_device *kbdev;
511 size_t cur_size;
512 size_t max_size;
513 u8 order;
514 u8 group_id;
515 spinlock_t pool_lock;
516 struct list_head page_list;
517 struct shrinker reclaim;
518 atomic_t isolation_in_progress_cnt;
519
520 struct kbase_mem_pool *next_pool;
521
522 bool dying;
523 bool dont_reclaim;
524 };
525
526 /**
527 * struct kbase_mem_pool_group - a complete set of physical memory pools.
528 *
529 * @small: Array of objects containing the state for pools of 4 KiB size
530 * physical pages.
531 * @large: Array of objects containing the state for pools of 2 MiB size
532 * physical pages.
533 *
534 * Memory pools are used to allow efficient reallocation of previously-freed
535 * physical pages. A pair of memory pools is initialized for each physical
536 * memory group: one for 4 KiB pages and one for 2 MiB pages. These arrays
537 * should be indexed by physical memory group ID, the meaning of which is
538 * defined by the systems integrator.
539 */
540 struct kbase_mem_pool_group {
541 struct kbase_mem_pool small[MEMORY_GROUP_MANAGER_NR_GROUPS];
542 struct kbase_mem_pool large[MEMORY_GROUP_MANAGER_NR_GROUPS];
543 };
544
545 /**
546 * struct kbase_mem_pool_config - Initial configuration for a physical memory
547 * pool
548 *
549 * @max_size: Maximum number of free pages that the pool can hold.
550 */
551 struct kbase_mem_pool_config {
552 size_t max_size;
553 };
554
555 /**
556 * struct kbase_mem_pool_group_config - Initial configuration for a complete
557 * set of physical memory pools
558 *
559 * @small: Array of initial configuration for pools of 4 KiB pages.
560 * @large: Array of initial configuration for pools of 2 MiB pages.
561 *
562 * This array should be indexed by physical memory group ID, the meaning
563 * of which is defined by the systems integrator.
564 */
565 struct kbase_mem_pool_group_config {
566 struct kbase_mem_pool_config small[MEMORY_GROUP_MANAGER_NR_GROUPS];
567 struct kbase_mem_pool_config large[MEMORY_GROUP_MANAGER_NR_GROUPS];
568 };
569
570 /**
571 * struct kbase_devfreq_opp - Lookup table for converting between nominal OPP
572 * frequency, real frequencies and core mask
573 * @real_freqs: Real GPU frequencies.
574 * @opp_volts: OPP voltages.
575 * @opp_freq: Nominal OPP frequency
576 * @core_mask: Shader core mask
577 */
578 struct kbase_devfreq_opp {
579 u64 opp_freq;
580 u64 core_mask;
581 u64 real_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
582 u32 opp_volts[BASE_MAX_NR_CLOCKS_REGULATORS];
583 };
584
585 /* MMU mode flags */
586 #define KBASE_MMU_MODE_HAS_NON_CACHEABLE (1ul << 0) /* Has NON_CACHEABLE MEMATTR */
587
588 /**
589 * struct kbase_mmu_mode - object containing pointer to methods invoked for
590 * programming the MMU, as per the MMU mode supported
591 * by Hw.
592 * @update: enable & setup/configure one of the GPU address space.
593 * @get_as_setup: retrieve the configuration of one of the GPU address space.
594 * @disable_as: disable one of the GPU address space.
595 * @pte_to_phy_addr: retrieve the physical address encoded in the page table entry.
596 * @ate_is_valid: check if the pte is a valid address translation entry
597 * encoding the physical address of the actual mapped page.
598 * @pte_is_valid: check if the pte is a valid entry encoding the physical
599 * address of the next lower level page table.
600 * @entry_set_ate: program the pte to be a valid address translation entry to
601 * encode the physical address of the actual page being mapped.
602 * @entry_set_pte: program the pte to be a valid entry to encode the physical
603 * address of the next lower level page table and also update
604 * the number of valid entries.
605 * @entries_invalidate: clear out or invalidate a range of ptes.
606 * @get_num_valid_entries: returns the number of valid entries for a specific pgd.
607 * @set_num_valid_entries: sets the number of valid entries for a specific pgd
608 * @flags: bitmask of MMU mode flags. Refer to KBASE_MMU_MODE_ constants.
609 */
610 struct kbase_mmu_mode {
611 void (*update)(struct kbase_device *kbdev,
612 struct kbase_mmu_table *mmut,
613 int as_nr);
614 void (*get_as_setup)(struct kbase_mmu_table *mmut,
615 struct kbase_mmu_setup * const setup);
616 void (*disable_as)(struct kbase_device *kbdev, int as_nr);
617 phys_addr_t (*pte_to_phy_addr)(u64 entry);
618 int (*ate_is_valid)(u64 ate, int level);
619 int (*pte_is_valid)(u64 pte, int level);
620 void (*entry_set_ate)(u64 *entry, struct tagged_addr phy,
621 unsigned long flags, int level);
622 void (*entry_set_pte)(u64 *entry, phys_addr_t phy);
623 void (*entries_invalidate)(u64 *entry, u32 count);
624 unsigned int (*get_num_valid_entries)(u64 *pgd);
625 void (*set_num_valid_entries)(u64 *pgd,
626 unsigned int num_of_valid_entries);
627 unsigned long flags;
628 };
629
630 struct kbase_mmu_mode const *kbase_mmu_mode_get_aarch64(void);
631
632 #define DEVNAME_SIZE 16
633
634 /**
635 * enum kbase_devfreq_work_type - The type of work to perform in the devfreq
636 * suspend/resume worker.
637 * @DEVFREQ_WORK_NONE: Initilisation state.
638 * @DEVFREQ_WORK_SUSPEND: Call devfreq_suspend_device().
639 * @DEVFREQ_WORK_RESUME: Call devfreq_resume_device().
640 */
641 enum kbase_devfreq_work_type {
642 DEVFREQ_WORK_NONE,
643 DEVFREQ_WORK_SUSPEND,
644 DEVFREQ_WORK_RESUME
645 };
646
647 /**
648 * struct kbase_devfreq_queue_info - Object representing an instance for managing
649 * the queued devfreq suspend/resume works.
650 * @workq: Workqueue for devfreq suspend/resume requests
651 * @work: Work item for devfreq suspend & resume
652 * @req_type: Requested work type to be performed by the devfreq
653 * suspend/resume worker
654 * @acted_type: Work type has been acted on by the worker, i.e. the
655 * internal recorded state of the suspend/resume
656 */
657 struct kbase_devfreq_queue_info {
658 struct workqueue_struct *workq;
659 struct work_struct work;
660 enum kbase_devfreq_work_type req_type;
661 enum kbase_devfreq_work_type acted_type;
662 };
663
664 /**
665 * struct kbase_process - Representing an object of a kbase process instantiated
666 * when the first kbase context is created under it.
667 * @tgid: Thread group ID.
668 * @total_gpu_pages: Total gpu pages allocated across all the contexts
669 * of this process, it accounts for both native allocations
670 * and dma_buf imported allocations.
671 * @kctx_list: List of kbase contexts created for the process.
672 * @kprcs_node: Node to a rb_tree, kbase_device will maintain a rb_tree
673 * based on key tgid, kprcs_node is the node link to
674 * &struct_kbase_device.process_root.
675 * @dma_buf_root: RB tree of the dma-buf imported allocations, imported
676 * across all the contexts created for this process.
677 * Used to ensure that pages of allocation are accounted
678 * only once for the process, even if the allocation gets
679 * imported multiple times for the process.
680 */
681 struct kbase_process {
682 pid_t tgid;
683 size_t total_gpu_pages;
684 struct list_head kctx_list;
685
686 struct rb_node kprcs_node;
687 struct rb_root dma_buf_root;
688 };
689
690 /**
691 * struct kbase_mem_migrate - Object representing an instance for managing
692 * page migration.
693 *
694 * @free_pages_list: List of deferred pages to free. Mostly used when page migration
695 * is enabled. Pages in memory pool that require migrating
696 * will be freed instead. However page cannot be freed
697 * right away as Linux will need to release the page lock.
698 * Therefore page will be added to this list and freed later.
699 * @free_pages_lock: This lock should be held when adding or removing pages
700 * from @free_pages_list.
701 * @free_pages_workq: Work queue to process the work items queued to free
702 * pages in @free_pages_list.
703 * @free_pages_work: Work item to free pages in @free_pages_list.
704 * @inode: Pointer to inode whose address space operations are used
705 * for page migration purposes.
706 */
707 struct kbase_mem_migrate {
708 struct list_head free_pages_list;
709 spinlock_t free_pages_lock;
710 struct workqueue_struct *free_pages_workq;
711 struct work_struct free_pages_work;
712 #if (KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE)
713 struct inode *inode;
714 #endif
715 };
716
717 /**
718 * struct kbase_device - Object representing an instance of GPU platform device,
719 * allocated from the probe method of mali driver.
720 * @hw_quirks_sc: Configuration to be used for the shader cores as per
721 * the HW issues present in the GPU.
722 * @hw_quirks_tiler: Configuration to be used for the Tiler as per the HW
723 * issues present in the GPU.
724 * @hw_quirks_mmu: Configuration to be used for the MMU as per the HW
725 * issues present in the GPU.
726 * @hw_quirks_gpu: Configuration to be used for the Job Manager or CSF/MCU
727 * subsystems as per the HW issues present in the GPU.
728 * @entry: Links the device instance to the global list of GPU
729 * devices. The list would have as many entries as there
730 * are GPU device instances.
731 * @dev: Pointer to the kernel's generic/base representation
732 * of the GPU platform device.
733 * @mdev: Pointer to the miscellaneous device registered to
734 * provide Userspace access to kernel driver through the
735 * device file /dev/malixx.
736 * @reg_start: Base address of the region in physical address space
737 * where GPU registers have been mapped.
738 * @reg_size: Size of the region containing GPU registers
739 * @reg: Kernel virtual address of the region containing GPU
740 * registers, using which Driver will access the registers.
741 * @irqs: Array containing IRQ resource info for 3 types of
742 * interrupts : Job scheduling, MMU & GPU events (like
743 * power management, cache etc.)
744 * @irqs.irq: irq number
745 * @irqs.flags: irq flags
746 * @clocks: Pointer to the input clock resources referenced by
747 * the GPU device node.
748 * @scmi_clk: Pointer to the input scmi clock resources
749 * @nr_clocks: Number of clocks set in the clocks array.
750 * @regulators: Pointer to the structs corresponding to the
751 * regulators referenced by the GPU device node.
752 * @nr_regulators: Number of regulators set in the regulators array.
753 * @opp_table: Pointer to the device OPP structure maintaining the
754 * link to OPPs attached to a device. This is obtained
755 * after setting regulator names for the device.
756 * @token: Integer replacement for opp_table in kernel versions
757 * 6 and greater. Value is a token id number when 0 or greater,
758 * and a linux errno when negative. Must be initialised
759 * to an non-zero value as 0 is valid token id.
760 * @devname: string containing the name used for GPU device instance,
761 * miscellaneous device is registered using the same name.
762 * @id: Unique identifier for the device, indicates the number of
763 * devices which have been created so far.
764 * @model: Pointer, valid only when Driver is compiled to not access
765 * the real GPU Hw, to the dummy model which tries to mimic
766 * to some extent the state & behavior of GPU Hw in response
767 * to the register accesses made by the Driver.
768 * @irq_slab: slab cache for allocating the work items queued when
769 * model mimics raising of IRQ to cause an interrupt on CPU.
770 * @irq_workq: workqueue for processing the irq work items.
771 * @serving_job_irq: function to execute work items queued when model mimics
772 * the raising of JS irq, mimics the interrupt handler
773 * processing JS interrupts.
774 * @serving_gpu_irq: function to execute work items queued when model mimics
775 * the raising of GPU irq, mimics the interrupt handler
776 * processing GPU interrupts.
777 * @serving_mmu_irq: function to execute work items queued when model mimics
778 * the raising of MMU irq, mimics the interrupt handler
779 * processing MMU interrupts.
780 * @reg_op_lock: lock used by model to serialize the handling of register
781 * accesses made by the driver.
782 * @pm: Per device object for storing data for power management
783 * framework.
784 * @fw_load_lock: Mutex to protect firmware loading in @ref kbase_open.
785 * @csf: CSF object for the GPU device.
786 * @js_data: Per device object encapsulating the current context of
787 * Job Scheduler, which is global to the device and is not
788 * tied to any particular struct kbase_context running on
789 * the device
790 * @mem_pools: Global pools of free physical memory pages which can
791 * be used by all the contexts.
792 * @memdev: keeps track of the in use physical pages allocated by
793 * the Driver.
794 * @mmu_mode: Pointer to the object containing methods for programming
795 * the MMU, depending on the type of MMU supported by Hw.
796 * @mgm_dev: Pointer to the memory group manager device attached
797 * to the GPU device. This points to an internal memory
798 * group manager if no platform-specific memory group
799 * manager was retrieved through device tree.
800 * @as: Array of objects representing address spaces of GPU.
801 * @as_free: Bitpattern of free/available GPU address spaces.
802 * @as_to_kctx: Array of pointers to struct kbase_context, having
803 * GPU adrress spaces assigned to them.
804 * @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
805 * register used in the handling of Bus & Page faults.
806 * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are
807 * supported and used where possible.
808 * @gpu_props: Object containing complete information about the
809 * configuration/properties of GPU HW device in use.
810 * @hw_issues_mask: List of SW workarounds for HW issues
811 * @hw_features_mask: List of available HW features.
812 * @disjoint_event: struct for keeping track of the disjoint information,
813 * that whether the GPU is in a disjoint state and the
814 * number of disjoint events that have occurred on GPU.
815 * @disjoint_event.count: disjoint event count
816 * @disjoint_event.state: disjoint event state
817 * @nr_hw_address_spaces: Number of address spaces actually available in the
818 * GPU, remains constant after driver initialisation.
819 * @nr_user_address_spaces: Number of address spaces available to user contexts
820 * @hwcnt_backend_csf_if_fw: Firmware interface to access CSF GPU performance
821 * counters.
822 * @hwcnt: Structure used for instrumentation and HW counters
823 * dumping
824 * @hwcnt.lock: The lock should be used when accessing any of the
825 * following members
826 * @hwcnt.kctx: kbase context
827 * @hwcnt.addr: HW counter address
828 * @hwcnt.addr_bytes: HW counter size in bytes
829 * @hwcnt.backend: Kbase instrumentation backend
830 * @hwcnt_gpu_jm_backend: Job manager GPU backend interface, used as superclass reference
831 * pointer by hwcnt_gpu_iface, which wraps this implementation in
832 * order to extend it with periodic dumping functionality.
833 * @hwcnt_gpu_iface: Backend interface for GPU hardware counter access.
834 * @hwcnt_watchdog_timer: Watchdog interface, used by the GPU backend hwcnt_gpu_iface to
835 * perform periodic dumps in order to prevent hardware counter value
836 * overflow or saturation.
837 * @hwcnt_gpu_ctx: Context for GPU hardware counter access.
838 * @hwaccess_lock must be held when calling
839 * kbase_hwcnt_context_enable() with @hwcnt_gpu_ctx.
840 * @hwcnt_gpu_virt: Virtualizer for GPU hardware counters.
841 * @vinstr_ctx: vinstr context created per device.
842 * @kinstr_prfcnt_ctx: kinstr_prfcnt context created per device.
843 * @timeline_flags: Bitmask defining which sets of timeline tracepoints
844 * are enabled. If zero, there is no timeline client and
845 * therefore timeline is disabled.
846 * @timeline: Timeline context created per device.
847 * @ktrace: kbase device's ktrace
848 * @reset_timeout_ms: Number of milliseconds to wait for the soft stop to
849 * complete for the GPU jobs before proceeding with the
850 * GPU reset.
851 * @lowest_gpu_freq_khz: Lowest frequency in KHz that the GPU can run at. Used
852 * to calculate suitable timeouts for wait operations.
853 * @backend_time: Kbase backend time related attributes.
854 * @cache_clean_in_progress: Set when a cache clean has been started, and
855 * cleared when it has finished. This prevents multiple
856 * cache cleans being done simultaneously.
857 * @cache_clean_queued: Pended cache clean operations invoked while another is
858 * in progress. If this is not 0, another cache clean needs
859 * to be triggered immediately after completion of the
860 * current one.
861 * @cache_clean_wait: Signalled when a cache clean has finished.
862 * @platform_context: Platform specific private data to be accessed by
863 * platform specific config files only.
864 * @kctx_list: List of kbase_contexts created for the device,
865 * including any contexts that might be created for
866 * hardware counters.
867 * @kctx_list_lock: Lock protecting concurrent accesses to @kctx_list.
868 * @devfreq_profile: Describes devfreq profile for the Mali GPU device, passed
869 * to devfreq_add_device() to add devfreq feature to Mali
870 * GPU device.
871 * @devfreq: Pointer to devfreq structure for Mali GPU device,
872 * returned on the call to devfreq_add_device().
873 * @current_freqs: The real frequencies, corresponding to
874 * @current_nominal_freq, at which the Mali GPU device
875 * is currently operating, as retrieved from
876 * @devfreq_table in the target callback of
877 * @devfreq_profile.
878 * @current_nominal_freq: The nominal frequency currently used for the Mali GPU
879 * device as retrieved through devfreq_recommended_opp()
880 * using the freq value passed as an argument to target
881 * callback of @devfreq_profile
882 * @current_voltages: The voltages corresponding to @current_nominal_freq,
883 * as retrieved from @devfreq_table in the target
884 * callback of @devfreq_profile.
885 * @current_core_mask: bitmask of shader cores that are currently desired &
886 * enabled, corresponding to @current_nominal_freq as
887 * retrieved from @devfreq_table in the target callback
888 * of @devfreq_profile.
889 * @devfreq_table: Pointer to the lookup table for converting between
890 * nominal OPP (operating performance point) frequency,
891 * and real frequency and core mask. This table is
892 * constructed according to operating-points-v2-mali
893 * table in devicetree.
894 * @num_opps: Number of operating performance points available for the Mali
895 * GPU device.
896 * @last_devfreq_metrics: last PM metrics
897 * @devfreq_queue: Per device object for storing data that manages devfreq
898 * suspend & resume request queue and the related items.
899 * @devfreq_cooling: Pointer returned on registering devfreq cooling device
900 * corresponding to @devfreq.
901 * @ipa_protection_mode_switched: is set to TRUE when GPU is put into protected
902 * mode. It is a sticky flag which is cleared by IPA
903 * once it has made use of information that GPU had
904 * previously entered protected mode.
905 * @ipa: Top level structure for IPA, containing pointers to both
906 * configured & fallback models.
907 * @ipa.lock: Access to this struct must be with ipa.lock held
908 * @ipa.configured_model: ipa model to use
909 * @ipa.fallback_model: ipa fallback model
910 * @ipa.last_metrics: Values of the PM utilization metrics from last time
911 * the power model was invoked. The utilization is
912 * calculated as the difference between last_metrics
913 * and the current values.
914 * @ipa.force_fallback_model: true if use of fallback model has been forced by
915 * the User
916 * @ipa.last_sample_time: Records the time when counters, used for dynamic
917 * energy estimation, were last sampled.
918 * @previous_frequency: Previous frequency of GPU clock used for
919 * BASE_HW_ISSUE_GPU2017_1336 workaround, This clock is
920 * restored when L2 is powered on.
921 * @job_fault_debug: Flag to control the dumping of debug data for job faults,
922 * set when the 'job_fault' debugfs file is opened.
923 * @mali_debugfs_directory: Root directory for the debugfs files created by the driver
924 * @debugfs_ctx_directory: Directory inside the @mali_debugfs_directory containing
925 * a sub-directory for every context.
926 * @debugfs_instr_directory: Instrumentation debugfs directory
927 * @debugfs_as_read_bitmap: bitmap of address spaces for which the bus or page fault
928 * has occurred.
929 * @job_fault_wq: Waitqueue to block the job fault dumping daemon till the
930 * occurrence of a job fault.
931 * @job_fault_resume_wq: Waitqueue on which every context with a faulty job wait
932 * for the job fault dumping to complete before they can
933 * do bottom half of job done for the atoms which followed
934 * the faulty atom.
935 * @job_fault_resume_workq: workqueue to process the work items queued for the faulty
936 * atoms, whereby the work item function waits for the dumping
937 * to get completed.
938 * @job_fault_event_list: List of atoms, each belonging to a different context, which
939 * generated a job fault.
940 * @job_fault_event_lock: Lock to protect concurrent accesses to @job_fault_event_list
941 * @regs_dump_debugfs_data: Contains the offset of register to be read through debugfs
942 * file "read_register".
943 * @regs_dump_debugfs_data.reg_offset: Contains the offset of register to be
944 * read through debugfs file "read_register".
945 * @ctx_num: Total number of contexts created for the device.
946 * @io_history: Pointer to an object keeping a track of all recent
947 * register accesses. The history of register accesses
948 * can be read through "regs_history" debugfs file.
949 * @hwaccess: Contains a pointer to active kbase context and GPU
950 * backend specific data for HW access layer.
951 * @faults_pending: Count of page/bus faults waiting for bottom half processing
952 * via workqueues.
953 * @mmu_hw_operation_in_progress: Set before sending the MMU command and is
954 * cleared after the command is complete. Whilst this
955 * flag is set, the write to L2_PWROFF register will be
956 * skipped which is needed to workaround the HW issue
957 * GPU2019-3878. PM state machine is invoked after
958 * clearing this flag and @hwaccess_lock is used to
959 * serialize the access.
960 * @mmu_page_migrate_in_progress: Set before starting a MMU page migration transaction
961 * and cleared after the transaction completes. PM L2 state is
962 * prevented from entering powering up/down transitions when the
963 * flag is set, @hwaccess_lock is used to serialize the access.
964 * @poweroff_pending: Set when power off operation for GPU is started, reset when
965 * power on for GPU is started.
966 * @infinite_cache_active_default: Set to enable using infinite cache for all the
967 * allocations of a new context.
968 * @mem_pool_defaults: Default configuration for the group of memory pools
969 * created for a new context.
970 * @current_gpu_coherency_mode: coherency mode in use, which can be different
971 * from @system_coherency, when using protected mode.
972 * @system_coherency: coherency mode as retrieved from the device tree.
973 * @cci_snoop_enabled: Flag to track when CCI snoops have been enabled.
974 * @snoop_enable_smc: SMC function ID to call into Trusted firmware to
975 * enable cache snooping. Value of 0 indicates that it
976 * is not used.
977 * @snoop_disable_smc: SMC function ID to call disable cache snooping.
978 * @protected_ops: Pointer to the methods for switching in or out of the
979 * protected mode, as per the @protected_dev being used.
980 * @protected_dev: Pointer to the protected mode switcher device attached
981 * to the GPU device retrieved through device tree if
982 * GPU do not support protected mode switching natively.
983 * @protected_mode: set to TRUE when GPU is put into protected mode
984 * @protected_mode_transition: set to TRUE when GPU is transitioning into or
985 * out of protected mode.
986 * @protected_mode_hwcnt_desired: True if we want GPU hardware counters to be
987 * enabled. Counters must be disabled before transition
988 * into protected mode.
989 * @protected_mode_hwcnt_disabled: True if GPU hardware counters are not
990 * enabled.
991 * @protected_mode_hwcnt_disable_work: Work item to disable GPU hardware
992 * counters, used if atomic disable is not possible.
993 * @irq_reset_flush: Flag to indicate that GPU reset is in-flight and flush of
994 * IRQ + bottom half is being done, to prevent the writes
995 * to MMU_IRQ_CLEAR & MMU_IRQ_MASK registers.
996 * @inited_subsys: Bitmap of inited sub systems at the time of device probe.
997 * Used during device remove or for handling error in probe.
998 * @hwaccess_lock: Lock, which can be taken from IRQ context, to serialize
999 * the updates made to Job dispatcher + scheduler states.
1000 * @mmu_hw_mutex: Protects access to MMU operations and address space
1001 * related state.
1002 * @serialize_jobs: Currently used mode for serialization of jobs, both
1003 * intra & inter slots serialization is supported.
1004 * @backup_serialize_jobs: Copy of the original value of @serialize_jobs taken
1005 * when GWT is enabled. Used to restore the original value
1006 * on disabling of GWT.
1007 * @js_ctx_scheduling_mode: Context scheduling mode currently being used by
1008 * Job Scheduler
1009 * @l2_size_override: Used to set L2 cache size via device tree blob
1010 * @l2_hash_override: Used to set L2 cache hash via device tree blob
1011 * @l2_hash_values_override: true if @l2_hash_values is valid.
1012 * @l2_hash_values: Used to set L2 asn_hash via device tree blob
1013 * @sysc_alloc: Array containing values to be programmed into
1014 * SYSC_ALLOC[0..7] GPU registers on L2 cache
1015 * power down. These come from either DTB or
1016 * via DebugFS (if it is available in kernel).
1017 * @process_root: rb_tree root node for maintaining a rb_tree of
1018 * kbase_process based on key tgid(thread group ID).
1019 * @dma_buf_root: rb_tree root node for maintaining a rb_tree of
1020 * &struct kbase_dma_buf based on key dma_buf.
1021 * We maintain a rb_tree of dma_buf mappings under
1022 * kbase_device and kbase_process, one indicates a
1023 * mapping and gpu memory usage at device level and
1024 * other one at process level.
1025 * @total_gpu_pages: Total GPU pages used for the complete GPU device.
1026 * @dma_buf_lock: This mutex should be held while accounting for
1027 * @total_gpu_pages from imported dma buffers.
1028 * @gpu_mem_usage_lock: This spinlock should be held while accounting
1029 * @total_gpu_pages for both native and dma-buf imported
1030 * allocations.
1031 * @dummy_job_wa: struct for dummy job execution workaround for the
1032 * GPU hang issue
1033 * @dummy_job_wa.ctx: dummy job workaround context
1034 * @dummy_job_wa.jc: dummy job workaround job
1035 * @dummy_job_wa.slot: dummy job workaround slot
1036 * @dummy_job_wa.flags: dummy job workaround flags
1037 * @dummy_job_wa_loaded: Flag for indicating that the workaround blob has
1038 * been loaded. Protected by @fw_load_lock.
1039 * @arb: Pointer to the arbiter device
1040 * @pcm_dev: The priority control manager device.
1041 * @oom_notifier_block: notifier_block containing kernel-registered out-of-
1042 * memory handler.
1043 * @mem_migrate: Per device object for managing page migration.
1044 * @live_fence_metadata: Count of live fence metadata structures created by
1045 * KCPU queue. These structures may outlive kbase module
1046 * itself. Therefore, in such a case, a warning should be
1047 * be produced.
1048 * @mmu_as_inactive_wait_time_ms: Maximum waiting time in ms for the completion of
1049 * a MMU operation
1050 * @va_region_slab: kmem_cache (slab) for allocated kbase_va_region structures.
1051 */
1052 struct kbase_device {
1053 u32 hw_quirks_sc;
1054 u32 hw_quirks_tiler;
1055 u32 hw_quirks_mmu;
1056 u32 hw_quirks_gpu;
1057
1058 struct list_head entry;
1059 struct device *dev;
1060 struct miscdevice mdev;
1061 u64 reg_start;
1062 size_t reg_size;
1063 void __iomem *reg;
1064
1065 struct {
1066 int irq;
1067 int flags;
1068 } irqs[3];
1069
1070 struct clk *clocks[BASE_MAX_NR_CLOCKS_REGULATORS];
1071 unsigned int nr_clocks;
1072 #if IS_ENABLED(CONFIG_REGULATOR)
1073 struct regulator *regulators[BASE_MAX_NR_CLOCKS_REGULATORS];
1074 unsigned int nr_regulators;
1075 #if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
1076 int token;
1077 #elif (KERNEL_VERSION(4, 10, 0) <= LINUX_VERSION_CODE)
1078 struct opp_table *opp_table;
1079 #endif /* (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE) */
1080 #endif /* CONFIG_REGULATOR */
1081 char devname[DEVNAME_SIZE];
1082 u32 id;
1083
1084 #if !IS_ENABLED(CONFIG_MALI_REAL_HW)
1085 void *model;
1086 struct kmem_cache *irq_slab;
1087 struct workqueue_struct *irq_workq;
1088 atomic_t serving_job_irq;
1089 atomic_t serving_gpu_irq;
1090 atomic_t serving_mmu_irq;
1091 spinlock_t reg_op_lock;
1092 #endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
1093 struct kbase_pm_device_data pm;
1094
1095 struct kbase_mem_pool_group mem_pools;
1096 struct kbasep_mem_device memdev;
1097 struct kbase_mmu_mode const *mmu_mode;
1098
1099 struct memory_group_manager_device *mgm_dev;
1100
1101 struct kbase_as as[BASE_MAX_NR_AS];
1102 u16 as_free;
1103 struct kbase_context *as_to_kctx[BASE_MAX_NR_AS];
1104
1105 spinlock_t mmu_mask_change;
1106
1107 bool pagesize_2mb;
1108
1109 struct kbase_gpu_props gpu_props;
1110
1111 unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
1112 unsigned long hw_features_mask[(BASE_HW_FEATURE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
1113
1114 struct {
1115 atomic_t count;
1116 atomic_t state;
1117 } disjoint_event;
1118
1119 s8 nr_hw_address_spaces;
1120 s8 nr_user_address_spaces;
1121
1122 /**
1123 * @pbha_propagate_bits: Record of Page-Based Hardware Attribute Propagate bits to
1124 * restore to L2_CONFIG upon GPU reset.
1125 */
1126 u8 pbha_propagate_bits;
1127
1128 #if MALI_USE_CSF
1129 struct kbase_hwcnt_backend_csf_if hwcnt_backend_csf_if_fw;
1130 #else
1131 struct kbase_hwcnt {
1132 spinlock_t lock;
1133
1134 struct kbase_context *kctx;
1135 u64 addr;
1136 u64 addr_bytes;
1137
1138 struct kbase_instr_backend backend;
1139 } hwcnt;
1140
1141 struct kbase_hwcnt_backend_interface hwcnt_gpu_jm_backend;
1142 #endif
1143
1144 struct kbase_hwcnt_backend_interface hwcnt_gpu_iface;
1145 struct kbase_hwcnt_watchdog_interface hwcnt_watchdog_timer;
1146
1147 struct kbase_hwcnt_context *hwcnt_gpu_ctx;
1148 struct kbase_hwcnt_virtualizer *hwcnt_gpu_virt;
1149 struct kbase_vinstr_context *vinstr_ctx;
1150 struct kbase_kinstr_prfcnt_context *kinstr_prfcnt_ctx;
1151
1152 atomic_t timeline_flags;
1153 struct kbase_timeline *timeline;
1154
1155 #if KBASE_KTRACE_TARGET_RBUF
1156 struct kbase_ktrace ktrace;
1157 #endif
1158 u32 reset_timeout_ms;
1159
1160 u64 lowest_gpu_freq_khz;
1161
1162 #if MALI_USE_CSF
1163 struct kbase_backend_time backend_time;
1164 #endif
1165
1166 bool cache_clean_in_progress;
1167 u32 cache_clean_queued;
1168 wait_queue_head_t cache_clean_wait;
1169
1170 void *platform_context;
1171
1172 struct list_head kctx_list;
1173 struct mutex kctx_list_lock;
1174
1175 struct rockchip_opp_info opp_info;
1176 bool is_runtime_resumed;
1177 unsigned long current_nominal_freq;
1178 struct monitor_dev_info *mdev_info;
1179 #ifdef CONFIG_MALI_BIFROST_DEVFREQ
1180 struct devfreq_dev_profile devfreq_profile;
1181 struct devfreq *devfreq;
1182 unsigned long current_freqs[BASE_MAX_NR_CLOCKS_REGULATORS];
1183 unsigned long current_voltages[BASE_MAX_NR_CLOCKS_REGULATORS];
1184 u64 current_core_mask;
1185 struct kbase_devfreq_opp *devfreq_table;
1186 int num_opps;
1187 struct kbasep_pm_metrics last_devfreq_metrics;
1188 struct ipa_power_model_data *model_data;
1189 struct kbase_devfreq_queue_info devfreq_queue;
1190
1191 #if IS_ENABLED(CONFIG_DEVFREQ_THERMAL)
1192 struct devfreq_cooling_power dfc_power;
1193 struct thermal_cooling_device *devfreq_cooling;
1194 bool ipa_protection_mode_switched;
1195 struct {
1196 /* Access to this struct must be with ipa.lock held */
1197 struct mutex lock;
1198 struct kbase_ipa_model *configured_model;
1199 struct kbase_ipa_model *fallback_model;
1200
1201 /* Values of the PM utilization metrics from last time the
1202 * power model was invoked. The utilization is calculated as
1203 * the difference between last_metrics and the current values.
1204 */
1205 struct kbasep_pm_metrics last_metrics;
1206
1207 /* true if use of fallback model has been forced by the User */
1208 bool force_fallback_model;
1209 /* Records the time when counters, used for dynamic energy
1210 * estimation, were last sampled.
1211 */
1212 ktime_t last_sample_time;
1213 } ipa;
1214 #endif /* CONFIG_DEVFREQ_THERMAL */
1215 #endif /* CONFIG_MALI_BIFROST_DEVFREQ */
1216 unsigned long previous_frequency;
1217
1218 #if !MALI_USE_CSF
1219 atomic_t job_fault_debug;
1220 #endif /* !MALI_USE_CSF */
1221
1222 #if IS_ENABLED(CONFIG_DEBUG_FS)
1223 struct dentry *mali_debugfs_directory;
1224 struct dentry *debugfs_ctx_directory;
1225 struct dentry *debugfs_instr_directory;
1226
1227 #ifdef CONFIG_MALI_BIFROST_DEBUG
1228 u64 debugfs_as_read_bitmap;
1229 #endif /* CONFIG_MALI_BIFROST_DEBUG */
1230
1231 #if !MALI_USE_CSF
1232 wait_queue_head_t job_fault_wq;
1233 wait_queue_head_t job_fault_resume_wq;
1234 struct workqueue_struct *job_fault_resume_workq;
1235 struct list_head job_fault_event_list;
1236 spinlock_t job_fault_event_lock;
1237 #endif /* !MALI_USE_CSF */
1238
1239 #if !MALI_CUSTOMER_RELEASE
1240 struct {
1241 u32 reg_offset;
1242 } regs_dump_debugfs_data;
1243 #endif /* !MALI_CUSTOMER_RELEASE */
1244 #endif /* CONFIG_DEBUG_FS */
1245
1246 atomic_t ctx_num;
1247
1248 #if IS_ENABLED(CONFIG_DEBUG_FS)
1249 struct kbase_io_history io_history;
1250 #endif /* CONFIG_DEBUG_FS */
1251
1252 struct kbase_hwaccess_data hwaccess;
1253
1254 atomic_t faults_pending;
1255
1256 #if MALI_USE_CSF
1257 bool mmu_hw_operation_in_progress;
1258 #endif
1259 bool mmu_page_migrate_in_progress;
1260 bool poweroff_pending;
1261
1262 bool infinite_cache_active_default;
1263
1264 struct kbase_mem_pool_group_config mem_pool_defaults;
1265
1266 u32 current_gpu_coherency_mode;
1267 u32 system_coherency;
1268
1269 bool cci_snoop_enabled;
1270
1271 u32 snoop_enable_smc;
1272 u32 snoop_disable_smc;
1273
1274 const struct protected_mode_ops *protected_ops;
1275
1276 struct protected_mode_device *protected_dev;
1277
1278 bool protected_mode;
1279
1280 bool protected_mode_transition;
1281
1282 bool protected_mode_hwcnt_desired;
1283
1284 bool protected_mode_hwcnt_disabled;
1285
1286 struct work_struct protected_mode_hwcnt_disable_work;
1287
1288
1289 bool irq_reset_flush;
1290
1291 u32 inited_subsys;
1292
1293 spinlock_t hwaccess_lock;
1294
1295 struct mutex mmu_hw_mutex;
1296
1297 u8 l2_size_override;
1298 u8 l2_hash_override;
1299 bool l2_hash_values_override;
1300 u32 l2_hash_values[ASN_HASH_COUNT];
1301
1302 u32 sysc_alloc[SYSC_ALLOC_COUNT];
1303
1304 struct mutex fw_load_lock;
1305 #if MALI_USE_CSF
1306 /* CSF object for the GPU device. */
1307 struct kbase_csf_device csf;
1308 #else
1309 struct kbasep_js_device_data js_data;
1310
1311 /* See KBASE_JS_*_PRIORITY_MODE for details. */
1312 u32 js_ctx_scheduling_mode;
1313
1314 /* See KBASE_SERIALIZE_* for details */
1315 u8 serialize_jobs;
1316
1317 #ifdef CONFIG_MALI_CINSTR_GWT
1318 u8 backup_serialize_jobs;
1319 #endif /* CONFIG_MALI_CINSTR_GWT */
1320
1321 #endif /* MALI_USE_CSF */
1322
1323 struct rb_root process_root;
1324 struct rb_root dma_buf_root;
1325
1326 size_t total_gpu_pages;
1327 struct mutex dma_buf_lock;
1328 spinlock_t gpu_mem_usage_lock;
1329
1330 struct {
1331 struct kbase_context *ctx;
1332 u64 jc;
1333 int slot;
1334 u64 flags;
1335 } dummy_job_wa;
1336 bool dummy_job_wa_loaded;
1337
1338 #ifdef CONFIG_MALI_ARBITER_SUPPORT
1339 struct kbase_arbiter_device arb;
1340 #endif
1341 /* Priority Control Manager device */
1342 struct priority_control_manager_device *pcm_dev;
1343
1344 struct notifier_block oom_notifier_block;
1345
1346 #if !MALI_USE_CSF
1347 spinlock_t quick_reset_lock;
1348 bool quick_reset_enabled;
1349 /*
1350 * 进入 quck_reset_mode 后 (quick_reset_enabled 为 true),
1351 * 对已经进入 KBASE_JD_ATOM_STATE_HW_COMPLETED 状态的 atom 的计数.
1352 *
1353 * 若 num_of_atoms_hw_completed 达到一定值, 将退出 quck_reset_mode.
1354 * 见 kbase_js_complete_atom() 对 num_of_atoms_hw_completed 的引用.
1355 */
1356 u32 num_of_atoms_hw_completed;
1357 #endif
1358
1359 struct kbase_mem_migrate mem_migrate;
1360
1361 #if MALI_USE_CSF && IS_ENABLED(CONFIG_SYNC_FILE)
1362 atomic_t live_fence_metadata;
1363 #endif
1364 u32 mmu_as_inactive_wait_time_ms;
1365 struct kmem_cache *va_region_slab;
1366 };
1367
1368 /**
1369 * enum kbase_file_state - Initialization state of a file opened by @kbase_open
1370 *
1371 * @KBASE_FILE_NEED_VSN: Initial state, awaiting API version.
1372 * @KBASE_FILE_VSN_IN_PROGRESS: Indicates if setting an API version is in
1373 * progress and other setup calls shall be
1374 * rejected.
1375 * @KBASE_FILE_NEED_CTX: Indicates if the API version handshake has
1376 * completed, awaiting context creation flags.
1377 * @KBASE_FILE_CTX_IN_PROGRESS: Indicates if the context's setup is in progress
1378 * and other setup calls shall be rejected.
1379 * @KBASE_FILE_COMPLETE: Indicates if the setup for context has
1380 * completed, i.e. flags have been set for the
1381 * context.
1382 *
1383 * The driver allows only limited interaction with user-space until setup
1384 * is complete.
1385 */
1386 enum kbase_file_state {
1387 KBASE_FILE_NEED_VSN,
1388 KBASE_FILE_VSN_IN_PROGRESS,
1389 KBASE_FILE_NEED_CTX,
1390 KBASE_FILE_CTX_IN_PROGRESS,
1391 KBASE_FILE_COMPLETE
1392 };
1393
1394 /**
1395 * struct kbase_file - Object representing a file opened by @kbase_open
1396 *
1397 * @kbdev: Object representing an instance of GPU platform device,
1398 * allocated from the probe method of the Mali driver.
1399 * @filp: Pointer to the struct file corresponding to device file
1400 * /dev/malixx instance, passed to the file's open method.
1401 * @kctx: Object representing an entity, among which GPU is
1402 * scheduled and which gets its own GPU address space.
1403 * Invalid until @setup_state is KBASE_FILE_COMPLETE.
1404 * @api_version: Contains the version number for User/kernel interface,
1405 * used for compatibility check. Invalid until
1406 * @setup_state is KBASE_FILE_NEED_CTX.
1407 * @setup_state: Initialization state of the file. Values come from
1408 * the kbase_file_state enumeration.
1409 */
1410 struct kbase_file {
1411 struct kbase_device *kbdev;
1412 struct file *filp;
1413 struct kbase_context *kctx;
1414 unsigned long api_version;
1415 atomic_t setup_state;
1416 };
1417 #if MALI_JIT_PRESSURE_LIMIT_BASE
1418 /**
1419 * enum kbase_context_flags - Flags for kbase contexts
1420 *
1421 * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
1422 * process on a 64-bit kernel.
1423 *
1424 * @KCTX_RUNNABLE_REF: Set when context is counted in
1425 * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
1426 *
1427 * @KCTX_ACTIVE: Set when the context is active.
1428 *
1429 * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
1430 * context.
1431 *
1432 * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
1433 * initialized.
1434 *
1435 * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
1436 * allocations. Existing allocations will not change.
1437 *
1438 * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
1439 *
1440 * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
1441 * scheduled in.
1442 *
1443 * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
1444 * This is only ever updated whilst the jsctx_mutex is held.
1445 *
1446 * @KCTX_DYING: Set when the context process is in the process of being evicted.
1447 *
1448 * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
1449 * allocations. For 64-bit clients it is enabled by default, and disabled by
1450 * default on 32-bit clients. Being able to clear this flag is only used for
1451 * testing purposes of the custom zone allocation on 64-bit user-space builds,
1452 * where we also require more control than is available through e.g. the JIT
1453 * allocation mechanism. However, the 64-bit user-space client must still
1454 * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
1455 *
1456 * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
1457 * from it for job slot 0. This is reset when the context first goes active or
1458 * is re-activated on that slot.
1459 *
1460 * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
1461 * from it for job slot 1. This is reset when the context first goes active or
1462 * is re-activated on that slot.
1463 *
1464 * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
1465 * from it for job slot 2. This is reset when the context first goes active or
1466 * is re-activated on that slot.
1467 *
1468 * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for
1469 * the context due to unhandled page(or bus) fault. It is cleared when the
1470 * refcount for the context drops to 0 or on when the address spaces are
1471 * re-enabled on GPU reset or power cycle.
1472 *
1473 * @KCTX_JPL_ENABLED: Set when JIT physical page limit is less than JIT virtual
1474 * address page limit, so we must take care to not exceed the physical limit
1475 *
1476 * All members need to be separate bits. This enum is intended for use in a
1477 * bitmask where multiple values get OR-ed together.
1478 */
1479 enum kbase_context_flags {
1480 KCTX_COMPAT = 1U << 0,
1481 KCTX_RUNNABLE_REF = 1U << 1,
1482 KCTX_ACTIVE = 1U << 2,
1483 KCTX_PULLED = 1U << 3,
1484 KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
1485 KCTX_INFINITE_CACHE = 1U << 5,
1486 KCTX_SUBMIT_DISABLED = 1U << 6,
1487 KCTX_PRIVILEGED = 1U << 7,
1488 KCTX_SCHEDULED = 1U << 8,
1489 KCTX_DYING = 1U << 9,
1490 KCTX_FORCE_SAME_VA = 1U << 11,
1491 KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
1492 KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
1493 KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
1494 KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
1495 KCTX_JPL_ENABLED = 1U << 16,
1496 };
1497 #else
1498 /**
1499 * enum kbase_context_flags - Flags for kbase contexts
1500 *
1501 * @KCTX_COMPAT: Set when the context process is a compat process, 32-bit
1502 * process on a 64-bit kernel.
1503 *
1504 * @KCTX_RUNNABLE_REF: Set when context is counted in
1505 * kbdev->js_data.nr_contexts_runnable. Must hold queue_mutex when accessing.
1506 *
1507 * @KCTX_ACTIVE: Set when the context is active.
1508 *
1509 * @KCTX_PULLED: Set when last kick() caused atoms to be pulled from this
1510 * context.
1511 *
1512 * @KCTX_MEM_PROFILE_INITIALIZED: Set when the context's memory profile has been
1513 * initialized.
1514 *
1515 * @KCTX_INFINITE_CACHE: Set when infinite cache is to be enabled for new
1516 * allocations. Existing allocations will not change.
1517 *
1518 * @KCTX_SUBMIT_DISABLED: Set to prevent context from submitting any jobs.
1519 *
1520 * @KCTX_PRIVILEGED:Set if the context uses an address space and should be kept
1521 * scheduled in.
1522 *
1523 * @KCTX_SCHEDULED: Set when the context is scheduled on the Run Pool.
1524 * This is only ever updated whilst the jsctx_mutex is held.
1525 *
1526 * @KCTX_DYING: Set when the context process is in the process of being evicted.
1527 *
1528 *
1529 * @KCTX_FORCE_SAME_VA: Set when BASE_MEM_SAME_VA should be forced on memory
1530 * allocations. For 64-bit clients it is enabled by default, and disabled by
1531 * default on 32-bit clients. Being able to clear this flag is only used for
1532 * testing purposes of the custom zone allocation on 64-bit user-space builds,
1533 * where we also require more control than is available through e.g. the JIT
1534 * allocation mechanism. However, the 64-bit user-space client must still
1535 * reserve a JIT region using KBASE_IOCTL_MEM_JIT_INIT
1536 *
1537 * @KCTX_PULLED_SINCE_ACTIVE_JS0: Set when the context has had an atom pulled
1538 * from it for job slot 0. This is reset when the context first goes active or
1539 * is re-activated on that slot.
1540 *
1541 * @KCTX_PULLED_SINCE_ACTIVE_JS1: Set when the context has had an atom pulled
1542 * from it for job slot 1. This is reset when the context first goes active or
1543 * is re-activated on that slot.
1544 *
1545 * @KCTX_PULLED_SINCE_ACTIVE_JS2: Set when the context has had an atom pulled
1546 * from it for job slot 2. This is reset when the context first goes active or
1547 * is re-activated on that slot.
1548 *
1549 * @KCTX_AS_DISABLED_ON_FAULT: Set when the GPU address space is disabled for
1550 * the context due to unhandled page(or bus) fault. It is cleared when the
1551 * refcount for the context drops to 0 or on when the address spaces are
1552 * re-enabled on GPU reset or power cycle.
1553 *
1554 * All members need to be separate bits. This enum is intended for use in a
1555 * bitmask where multiple values get OR-ed together.
1556 */
1557 enum kbase_context_flags {
1558 KCTX_COMPAT = 1U << 0,
1559 KCTX_RUNNABLE_REF = 1U << 1,
1560 KCTX_ACTIVE = 1U << 2,
1561 KCTX_PULLED = 1U << 3,
1562 KCTX_MEM_PROFILE_INITIALIZED = 1U << 4,
1563 KCTX_INFINITE_CACHE = 1U << 5,
1564 KCTX_SUBMIT_DISABLED = 1U << 6,
1565 KCTX_PRIVILEGED = 1U << 7,
1566 KCTX_SCHEDULED = 1U << 8,
1567 KCTX_DYING = 1U << 9,
1568 KCTX_FORCE_SAME_VA = 1U << 11,
1569 KCTX_PULLED_SINCE_ACTIVE_JS0 = 1U << 12,
1570 KCTX_PULLED_SINCE_ACTIVE_JS1 = 1U << 13,
1571 KCTX_PULLED_SINCE_ACTIVE_JS2 = 1U << 14,
1572 KCTX_AS_DISABLED_ON_FAULT = 1U << 15,
1573 };
1574 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
1575
1576 struct kbase_sub_alloc {
1577 struct list_head link;
1578 struct page *page;
1579 DECLARE_BITMAP(sub_pages, SZ_2M / SZ_4K);
1580 };
1581
1582 /**
1583 * struct kbase_context - Kernel base context
1584 *
1585 * @filp: Pointer to the struct file corresponding to device file
1586 * /dev/malixx instance, passed to the file's open method.
1587 * @kbdev: Pointer to the Kbase device for which the context is created.
1588 * @kctx_list_link: Node into Kbase device list of contexts.
1589 * @mmu: Structure holding details of the MMU tables for this
1590 * context
1591 * @id: Unique identifier for the context, indicates the number of
1592 * contexts which have been created for the device so far.
1593 * @api_version: contains the version number for User/kernel interface,
1594 * used for compatibility check.
1595 * @event_list: list of posted events about completed atoms, to be sent to
1596 * event handling thread of Userpsace.
1597 * @event_coalesce_list: list containing events corresponding to successive atoms
1598 * which have requested deferred delivery of the completion
1599 * events to Userspace.
1600 * @event_mutex: Lock to protect the concurrent access to @event_list &
1601 * @event_mutex.
1602 * @event_closed: Flag set through POST_TERM ioctl, indicates that Driver
1603 * should stop posting events and also inform event handling
1604 * thread that context termination is in progress.
1605 * @event_workq: Workqueue for processing work items corresponding to atoms
1606 * that do not return an event to userspace.
1607 * @event_count: Count of the posted events to be consumed by Userspace.
1608 * @event_coalesce_count: Count of the events present in @event_coalesce_list.
1609 * @flags: bitmap of enums from kbase_context_flags, indicating the
1610 * state & attributes for the context.
1611 * @aliasing_sink_page: Special page used for KBASE_MEM_TYPE_ALIAS allocations,
1612 * which can alias number of memory regions. The page is
1613 * represent a region where it is mapped with a write-alloc
1614 * cache setup, typically used when the write result of the
1615 * GPU isn't needed, but the GPU must write anyway.
1616 * @mem_partials_lock: Lock for protecting the operations done on the elements
1617 * added to @mem_partials list.
1618 * @mem_partials: List head for the list of large pages, 2MB in size, which
1619 * have been split into 4 KB pages and are used partially
1620 * for the allocations >= 2 MB in size.
1621 * @reg_lock: Lock used for GPU virtual address space management operations,
1622 * like adding/freeing a memory region in the address space.
1623 * Can be converted to a rwlock ?.
1624 * @reg_rbtree_same: RB tree of the memory regions allocated from the SAME_VA
1625 * zone of the GPU virtual address space. Used for allocations
1626 * having the same value for GPU & CPU virtual address.
1627 * @reg_rbtree_custom: RB tree of the memory regions allocated from the CUSTOM_VA
1628 * zone of the GPU virtual address space.
1629 * @reg_rbtree_exec: RB tree of the memory regions allocated from the EXEC_VA
1630 * zone of the GPU virtual address space. Used for GPU-executable
1631 * allocations which don't need the SAME_VA property.
1632 * @reg_rbtree_exec_fixed: RB tree of the memory regions allocated from the
1633 * EXEC_FIXED_VA zone of the GPU virtual address space. Used for
1634 * GPU-executable allocations with FIXED/FIXABLE GPU virtual
1635 * addresses.
1636 * @reg_rbtree_fixed: RB tree of the memory regions allocated from the FIXED_VA zone
1637 * of the GPU virtual address space. Used for allocations with
1638 * FIXED/FIXABLE GPU virtual addresses.
1639 * @num_fixable_allocs: A count for the number of memory allocations with the
1640 * BASE_MEM_FIXABLE property.
1641 * @num_fixed_allocs: A count for the number of memory allocations with the
1642 * BASE_MEM_FIXED property.
1643 * @reg_zone: Zone information for the reg_rbtree_<...> members.
1644 * @cookies: Bitmask containing of BITS_PER_LONG bits, used mainly for
1645 * SAME_VA allocations to defer the reservation of memory region
1646 * (from the GPU virtual address space) from base_mem_alloc
1647 * ioctl to mmap system call. This helps returning unique
1648 * handles, disguised as GPU VA, to Userspace from base_mem_alloc
1649 * and later retrieving the pointer to memory region structure
1650 * in the mmap handler.
1651 * @pending_regions: Array containing pointers to memory region structures,
1652 * used in conjunction with @cookies bitmask mainly for
1653 * providing a mechansim to have the same value for CPU &
1654 * GPU virtual address.
1655 * @event_queue: Wait queue used for blocking the thread, which consumes
1656 * the base_jd_event corresponding to an atom, when there
1657 * are no more posted events.
1658 * @tgid: Thread group ID of the process whose thread created
1659 * the context (by calling KBASE_IOCTL_VERSION_CHECK or
1660 * KBASE_IOCTL_SET_FLAGS, depending on the @api_version).
1661 * This is usually, but not necessarily, the same as the
1662 * process whose thread opened the device file
1663 * /dev/malixx instance.
1664 * @pid: ID of the thread, corresponding to process @tgid,
1665 * which actually created the context. This is usually,
1666 * but not necessarily, the same as the thread which
1667 * opened the device file /dev/malixx instance.
1668 * @csf: kbase csf context
1669 * @jctx: object encapsulating all the Job dispatcher related state,
1670 * including the array of atoms.
1671 * @used_pages: Keeps a track of the number of 4KB physical pages in use
1672 * for the context.
1673 * @nonmapped_pages: Updated in the same way as @used_pages, except for the case
1674 * when special tracking page is freed by userspace where it
1675 * is reset to 0.
1676 * @permanent_mapped_pages: Usage count of permanently mapped memory
1677 * @mem_pools: Context-specific pools of free physical memory pages.
1678 * @reclaim: Shrinker object registered with the kernel containing
1679 * the pointer to callback function which is invoked under
1680 * low memory conditions. In the callback function Driver
1681 * frees up the memory for allocations marked as
1682 * evictable/reclaimable.
1683 * @evict_list: List head for the list containing the allocations which
1684 * can be evicted or freed up in the shrinker callback.
1685 * @evict_nents: Total number of pages allocated by the allocations within
1686 * @evict_list (atomic).
1687 * @waiting_soft_jobs: List head for the list containing softjob atoms, which
1688 * are either waiting for the event set operation, or waiting
1689 * for the signaling of input fence or waiting for the GPU
1690 * device to powered on so as to dump the CPU/GPU timestamps.
1691 * @waiting_soft_jobs_lock: Lock to protect @waiting_soft_jobs list from concurrent
1692 * accesses.
1693 * @dma_fence: Object containing list head for the list of dma-buf fence
1694 * waiting atoms and the waitqueue to process the work item
1695 * queued for the atoms blocked on the signaling of dma-buf
1696 * fences.
1697 * @dma_fence.waiting_resource: list head for the list of dma-buf fence
1698 * @dma_fence.wq: waitqueue to process the work item queued
1699 * @as_nr: id of the address space being used for the scheduled in
1700 * context. This is effectively part of the Run Pool, because
1701 * it only has a valid setting (!=KBASEP_AS_NR_INVALID) whilst
1702 * the context is scheduled in. The hwaccess_lock must be held
1703 * whilst accessing this.
1704 * If the context relating to this value of as_nr is required,
1705 * then the context must be retained to ensure that it doesn't
1706 * disappear whilst it is being used. Alternatively, hwaccess_lock
1707 * can be held to ensure the context doesn't disappear (but this
1708 * has restrictions on what other locks can be taken simutaneously).
1709 * @refcount: Keeps track of the number of users of this context. A user
1710 * can be a job that is available for execution, instrumentation
1711 * needing to 'pin' a context for counter collection, etc.
1712 * If the refcount reaches 0 then this context is considered
1713 * inactive and the previously programmed AS might be cleared
1714 * at any point.
1715 * Generally the reference count is incremented when the context
1716 * is scheduled in and an atom is pulled from the context's per
1717 * slot runnable tree in JM GPU or GPU command queue
1718 * group is programmed on CSG slot in CSF GPU.
1719 * @process_mm: Pointer to the memory descriptor of the process which
1720 * created the context. Used for accounting the physical
1721 * pages used for GPU allocations, done for the context,
1722 * to the memory consumed by the process. A reference is taken
1723 * on this descriptor for the Userspace created contexts so that
1724 * Kbase can safely access it to update the memory usage counters.
1725 * The reference is dropped on context termination.
1726 * @gpu_va_end: End address of the GPU va space (in 4KB page units)
1727 * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
1728 * tiler heaps of the kbase context.
1729 * @running_total_tiler_heap_memory: Running total of the tiler heap memory in the
1730 * kbase context.
1731 * @peak_total_tiler_heap_memory: Peak value of the total tiler heap memory in the
1732 * kbase context.
1733 * @jit_va: Indicates if a JIT_VA zone has been created.
1734 * @mem_profile_data: Buffer containing the profiling information provided by
1735 * Userspace, can be read through the mem_profile debugfs file.
1736 * @mem_profile_size: Size of the @mem_profile_data.
1737 * @mem_profile_lock: Lock to serialize the operations related to mem_profile
1738 * debugfs file.
1739 * @kctx_dentry: Pointer to the debugfs directory created for every context,
1740 * inside kbase_device::debugfs_ctx_directory, containing
1741 * context specific files.
1742 * @reg_dump: Buffer containing a register offset & value pair, used
1743 * for dumping job fault debug info.
1744 * @job_fault_count: Indicates that a job fault occurred for the context and
1745 * dumping of its debug info is in progress.
1746 * @job_fault_resume_event_list: List containing atoms completed after the faulty
1747 * atom but before the debug data for faulty atom was dumped.
1748 * @mem_view_column_width: Controls the number of bytes shown in every column of the
1749 * output of "mem_view" debugfs file.
1750 * @jsctx_queue: Per slot & priority arrays of object containing the root
1751 * of RB-tree holding currently runnable atoms on the job slot
1752 * and the head item of the linked list of atoms blocked on
1753 * cross-slot dependencies.
1754 * @slot_tracking: Tracking and control of this context's use of all job
1755 * slots
1756 * @atoms_pulled_all_slots: Total number of atoms currently pulled from the
1757 * context, across all slots.
1758 * @slots_pullable: Bitmask of slots, indicating the slots for which the
1759 * context has pullable atoms in the runnable tree.
1760 * @work: Work structure used for deferred ASID assignment.
1761 * @completed_jobs: List containing completed atoms for which base_jd_event is
1762 * to be posted.
1763 * @work_count: Number of work items, corresponding to atoms, currently
1764 * pending on job_done workqueue of @jctx.
1765 * @soft_job_timeout: Timer object used for failing/cancelling the waiting
1766 * soft-jobs which have been blocked for more than the
1767 * timeout value used for the soft-jobs
1768 * @jit_alloc: Array of 256 pointers to GPU memory regions, used for
1769 * just-in-time memory allocations.
1770 * @jit_max_allocations: Maximum allowed number of in-flight
1771 * just-in-time memory allocations.
1772 * @jit_current_allocations: Current number of in-flight just-in-time
1773 * memory allocations.
1774 * @jit_current_allocations_per_bin: Current number of in-flight just-in-time
1775 * memory allocations per bin.
1776 * @jit_group_id: A memory group ID to be passed to a platform-specific
1777 * memory group manager.
1778 * Valid range is 0..(MEMORY_GROUP_MANAGER_NR_GROUPS-1).
1779 * @jit_phys_pages_limit: Limit of physical pages to apply across all
1780 * just-in-time memory allocations, applied to
1781 * @jit_current_phys_pressure.
1782 * @jit_current_phys_pressure: Current 'pressure' on physical pages, which is
1783 * the sum of the worst case estimate of pages that
1784 * could be used (i.e. the
1785 * &struct_kbase_va_region.nr_pages for all in-use
1786 * just-in-time memory regions that have not yet had
1787 * a usage report) and the actual number of pages
1788 * that were used (i.e. the
1789 * &struct_kbase_va_region.used_pages for regions
1790 * that have had a usage report).
1791 * @jit_phys_pages_to_be_allocated: Count of the physical pages that are being
1792 * now allocated for just-in-time memory
1793 * allocations of a context (across all the
1794 * threads). This is supposed to be updated
1795 * with @reg_lock held before allocating
1796 * the backing pages. This helps ensure that
1797 * total physical memory usage for just in
1798 * time memory allocation remains within the
1799 * @jit_phys_pages_limit in multi-threaded
1800 * scenarios.
1801 * @jit_active_head: List containing the just-in-time memory allocations
1802 * which are in use.
1803 * @jit_pool_head: List containing the just-in-time memory allocations
1804 * which have been freed up by userspace and so not being
1805 * used by them.
1806 * Driver caches them to quickly fulfill requests for new
1807 * JIT allocations. They are released in case of memory
1808 * pressure as they are put on the @evict_list when they
1809 * are freed up by userspace.
1810 * @jit_destroy_head: List containing the just-in-time memory allocations
1811 * which were moved to it from @jit_pool_head, in the
1812 * shrinker callback, after freeing their backing
1813 * physical pages.
1814 * @jit_evict_lock: Lock used for operations done on just-in-time memory
1815 * allocations and also for accessing @evict_list.
1816 * @jit_work: Work item queued to defer the freeing of a memory
1817 * region when a just-in-time memory allocation is moved
1818 * to @jit_destroy_head.
1819 * @ext_res_meta_head: A list of sticky external resources which were requested to
1820 * be mapped on GPU side, through a softjob atom of type
1821 * EXT_RES_MAP or STICKY_RESOURCE_MAP ioctl.
1822 * @age_count: Counter incremented on every call to jd_submit_atom,
1823 * atom is assigned the snapshot of this counter, which
1824 * is used to determine the atom's age when it is added to
1825 * the runnable RB-tree.
1826 * @trim_level: Level of JIT allocation trimming to perform on free (0-100%)
1827 * @kprcs: Reference to @struct kbase_process that the current
1828 * kbase_context belongs to.
1829 * @kprcs_link: List link for the list of kbase context maintained
1830 * under kbase_process.
1831 * @gwt_enabled: Indicates if tracking of GPU writes is enabled, protected by
1832 * kbase_context.reg_lock.
1833 * @gwt_was_enabled: Simple sticky bit flag to know if GWT was ever enabled.
1834 * @gwt_current_list: A list of addresses for which GPU has generated write faults,
1835 * after the last snapshot of it was sent to userspace.
1836 * @gwt_snapshot_list: Snapshot of the @gwt_current_list for sending to user space.
1837 * @priority: Indicates the context priority. Used along with @atoms_count
1838 * for context scheduling, protected by hwaccess_lock.
1839 * @atoms_count: Number of GPU atoms currently in use, per priority
1840 * @create_flags: Flags used in context creation.
1841 * @kinstr_jm: Kernel job manager instrumentation context handle
1842 * @tl_kctx_list_node: List item into the device timeline's list of
1843 * contexts, for timeline summarization.
1844 * @limited_core_mask: The mask that is applied to the affinity in case of atoms
1845 * marked with BASE_JD_REQ_LIMITED_CORE_MASK.
1846 * @platform_data: Pointer to platform specific per-context data.
1847 * @task: Pointer to the task structure of the main thread of the process
1848 * that created the Kbase context. It would be set only for the
1849 * contexts created by the Userspace and not for the contexts
1850 * created internally by the Kbase.
1851 *
1852 * A kernel base context is an entity among which the GPU is scheduled.
1853 * Each context has its own GPU address space.
1854 * Up to one context can be created for each client that opens the device file
1855 * /dev/malixx. Context creation is deferred until a special ioctl() system call
1856 * is made on the device file.
1857 */
1858 struct kbase_context {
1859 struct file *filp;
1860 struct kbase_device *kbdev;
1861 struct list_head kctx_list_link;
1862 struct kbase_mmu_table mmu;
1863
1864 u32 id;
1865 unsigned long api_version;
1866 struct list_head event_list;
1867 struct list_head event_coalesce_list;
1868 struct mutex event_mutex;
1869 #if !MALI_USE_CSF
1870 atomic_t event_closed;
1871 #endif
1872 struct workqueue_struct *event_workq;
1873 atomic_t event_count;
1874 int event_coalesce_count;
1875
1876 atomic_t flags;
1877
1878 struct tagged_addr aliasing_sink_page;
1879
1880 spinlock_t mem_partials_lock;
1881 struct list_head mem_partials;
1882
1883 struct mutex reg_lock;
1884
1885 struct rb_root reg_rbtree_same;
1886 struct rb_root reg_rbtree_custom;
1887 struct rb_root reg_rbtree_exec;
1888 #if MALI_USE_CSF
1889 struct rb_root reg_rbtree_exec_fixed;
1890 struct rb_root reg_rbtree_fixed;
1891 atomic64_t num_fixable_allocs;
1892 atomic64_t num_fixed_allocs;
1893 #endif
1894 struct kbase_reg_zone reg_zone[KBASE_REG_ZONE_MAX];
1895
1896 #if MALI_USE_CSF
1897 struct kbase_csf_context csf;
1898 #else
1899 struct kbase_jd_context jctx;
1900 struct jsctx_queue jsctx_queue
1901 [KBASE_JS_ATOM_SCHED_PRIO_COUNT][BASE_JM_MAX_NR_SLOTS];
1902 struct kbase_jsctx_slot_tracking slot_tracking[BASE_JM_MAX_NR_SLOTS];
1903 atomic_t atoms_pulled_all_slots;
1904
1905 struct list_head completed_jobs;
1906 atomic_t work_count;
1907 struct timer_list soft_job_timeout;
1908
1909 int priority;
1910 s16 atoms_count[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
1911 u32 slots_pullable;
1912 u32 age_count;
1913 #endif /* MALI_USE_CSF */
1914
1915 DECLARE_BITMAP(cookies, BITS_PER_LONG);
1916 struct kbase_va_region *pending_regions[BITS_PER_LONG];
1917
1918 wait_queue_head_t event_queue;
1919 pid_t tgid;
1920 pid_t pid;
1921 atomic_t used_pages;
1922 atomic_t nonmapped_pages;
1923 atomic_t permanent_mapped_pages;
1924
1925 struct kbase_mem_pool_group mem_pools;
1926
1927 struct shrinker reclaim;
1928 struct list_head evict_list;
1929 atomic_t evict_nents;
1930
1931 struct list_head waiting_soft_jobs;
1932 spinlock_t waiting_soft_jobs_lock;
1933
1934 int as_nr;
1935
1936 atomic_t refcount;
1937
1938 struct mm_struct *process_mm;
1939 u64 gpu_va_end;
1940 #if MALI_USE_CSF
1941 u32 running_total_tiler_heap_nr_chunks;
1942 u64 running_total_tiler_heap_memory;
1943 u64 peak_total_tiler_heap_memory;
1944 #endif
1945 bool jit_va;
1946
1947 #if IS_ENABLED(CONFIG_DEBUG_FS)
1948 char *mem_profile_data;
1949 size_t mem_profile_size;
1950 struct mutex mem_profile_lock;
1951 struct dentry *kctx_dentry;
1952
1953 unsigned int *reg_dump;
1954 atomic_t job_fault_count;
1955 struct list_head job_fault_resume_event_list;
1956 unsigned int mem_view_column_width;
1957
1958 #endif /* CONFIG_DEBUG_FS */
1959 struct kbase_va_region *jit_alloc[1 + BASE_JIT_ALLOC_COUNT];
1960 u8 jit_max_allocations;
1961 u8 jit_current_allocations;
1962 u8 jit_current_allocations_per_bin[256];
1963 u8 jit_group_id;
1964 #if MALI_JIT_PRESSURE_LIMIT_BASE
1965 u64 jit_phys_pages_limit;
1966 u64 jit_current_phys_pressure;
1967 u64 jit_phys_pages_to_be_allocated;
1968 #endif /* MALI_JIT_PRESSURE_LIMIT_BASE */
1969 struct list_head jit_active_head;
1970 struct list_head jit_pool_head;
1971 struct list_head jit_destroy_head;
1972 struct mutex jit_evict_lock;
1973 struct work_struct jit_work;
1974
1975 struct list_head ext_res_meta_head;
1976
1977 u8 trim_level;
1978
1979 struct kbase_process *kprcs;
1980 struct list_head kprcs_link;
1981
1982 #ifdef CONFIG_MALI_CINSTR_GWT
1983 bool gwt_enabled;
1984 bool gwt_was_enabled;
1985 struct list_head gwt_current_list;
1986 struct list_head gwt_snapshot_list;
1987 #endif
1988
1989 base_context_create_flags create_flags;
1990
1991 #if !MALI_USE_CSF
1992 struct kbase_kinstr_jm *kinstr_jm;
1993 #endif
1994 struct list_head tl_kctx_list_node;
1995
1996 u64 limited_core_mask;
1997
1998 #if !MALI_USE_CSF
1999 void *platform_data;
2000 #endif
2001
2002 struct task_struct *task;
2003 };
2004
2005 #ifdef CONFIG_MALI_CINSTR_GWT
2006 /**
2007 * struct kbasep_gwt_list_element - Structure used to collect GPU
2008 * write faults.
2009 * @link: List head for adding write faults.
2010 * @region: Details of the region where we have the
2011 * faulting page address.
2012 * @page_addr: Page address where GPU write fault occurred.
2013 * @num_pages: The number of pages modified.
2014 *
2015 * Using this structure all GPU write faults are stored in a list.
2016 */
2017 struct kbasep_gwt_list_element {
2018 struct list_head link;
2019 struct kbase_va_region *region;
2020 u64 page_addr;
2021 u64 num_pages;
2022 };
2023
2024 #endif
2025
2026 /**
2027 * struct kbase_ctx_ext_res_meta - Structure which binds an external resource
2028 * to a @kbase_context.
2029 * @ext_res_node: List head for adding the metadata to a
2030 * @kbase_context.
2031 * @reg: External resource information, containing
2032 * the corresponding VA region
2033 * @ref: Reference count.
2034 *
2035 * External resources can be mapped into multiple contexts as well as the same
2036 * context multiple times.
2037 * As kbase_va_region is refcounted, we guarantee that it will be available
2038 * for the duration of the external resource, meaning it is sufficient to use
2039 * it to rederive any additional data, like the GPU address.
2040 * This metadata structure binds a single external resource to a single
2041 * context, ensuring that per context mapping is tracked separately so it can
2042 * be overridden when needed and abuses by the application (freeing the resource
2043 * multiple times) don't effect the refcount of the physical allocation.
2044 */
2045 struct kbase_ctx_ext_res_meta {
2046 struct list_head ext_res_node;
2047 struct kbase_va_region *reg;
2048 u32 ref;
2049 };
2050
2051 enum kbase_reg_access_type {
2052 REG_READ,
2053 REG_WRITE
2054 };
2055
2056 enum kbase_share_attr_bits {
2057 /* (1ULL << 8) bit is reserved */
2058 SHARE_BOTH_BITS = (2ULL << 8), /* inner and outer shareable coherency */
2059 SHARE_INNER_BITS = (3ULL << 8) /* inner shareable coherency */
2060 };
2061
2062 /**
2063 * kbase_device_is_cpu_coherent - Returns if the device is CPU coherent.
2064 * @kbdev: kbase device
2065 *
2066 * Return: true if the device access are coherent, false if not.
2067 */
kbase_device_is_cpu_coherent(struct kbase_device * kbdev)2068 static inline bool kbase_device_is_cpu_coherent(struct kbase_device *kbdev)
2069 {
2070 if ((kbdev->system_coherency == COHERENCY_ACE_LITE) ||
2071 (kbdev->system_coherency == COHERENCY_ACE))
2072 return true;
2073
2074 return false;
2075 }
2076
2077 /**
2078 * kbase_get_lock_region_min_size_log2 - Returns the minimum size of the MMU lock
2079 * region, as a logarithm
2080 *
2081 * @gpu_props: GPU properties
2082 *
2083 * Return: the minimum size of the MMU lock region as dictated by the corresponding
2084 * arch spec.
2085 */
kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const * gpu_props)2086 static inline u64 kbase_get_lock_region_min_size_log2(struct kbase_gpu_props const *gpu_props)
2087 {
2088 if (GPU_ID2_MODEL_MATCH_VALUE(gpu_props->props.core_props.product_id) >=
2089 GPU_ID2_MODEL_MAKE(12, 0))
2090 return 12; /* 4 kB */
2091
2092 return 15; /* 32 kB */
2093 }
2094
2095 /* Conversion helpers for setting up high resolution timers */
2096 #define HR_TIMER_DELAY_MSEC(x) (ns_to_ktime(((u64)(x))*1000000U))
2097 #define HR_TIMER_DELAY_NSEC(x) (ns_to_ktime(x))
2098
2099 /* Maximum number of loops polling the GPU for a cache flush before we assume it must have completed */
2100 #define KBASE_CLEAN_CACHE_MAX_LOOPS 100000
2101 /* Maximum number of loops polling the GPU for an AS command to complete before we assume the GPU has hung */
2102 #define KBASE_AS_INACTIVE_MAX_LOOPS 100000000
2103 /* Maximum number of loops polling the GPU PRFCNT_ACTIVE bit before we assume the GPU has hung */
2104 #define KBASE_PRFCNT_ACTIVE_MAX_LOOPS 100000000
2105 #endif /* _KBASE_DEFS_H_ */
2106