xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/jm/mali_kbase_js_defs.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3  *
4  * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /**
23  * DOC: Job Scheduler Type Definitions
24  */
25 
26 #ifndef _KBASE_JS_DEFS_H_
27 #define _KBASE_JS_DEFS_H_
28 
29 /* Forward decls */
30 struct kbase_device;
31 struct kbase_jd_atom;
32 
33 
34 typedef u32 kbase_context_flags;
35 
36 /*
37  * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's
38  * jobs registered with the Job Scheduler
39  */
40 typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev,
41 				  struct kbase_jd_atom *katom);
42 
43 /*
44  * @brief Maximum number of jobs that can be submitted to a job slot whilst
45  * inside the IRQ handler.
46  *
47  * This is important because GPU NULL jobs can complete whilst the IRQ handler
48  * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
49  * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
50  */
51 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
52 
53 /**
54  * enum kbasep_js_ctx_attr - Context attributes
55  * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains
56  *                              Compute jobs.
57  * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains
58  *                                  Non-Compute jobs.
59  * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context
60  *                                        contains compute-job atoms that aren't
61  *                                        restricted to a coherent group,
62  *                                        and can run on all cores.
63  * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum
64  *
65  * Each context attribute can be thought of as a boolean value that caches some
66  * state information about either the runpool, or the context:
67  * - In the case of the runpool, it is a cache of "Do any contexts owned by
68  * the runpool have attribute X?"
69  * - In the case of a context, it is a cache of "Do any atoms owned by the
70  * context have attribute X?"
71  *
72  * The boolean value of the context attributes often affect scheduling
73  * decisions, such as affinities to use and job slots to use.
74  *
75  * To accomodate changes of state in the context, each attribute is refcounted
76  * in the context, and in the runpool for all running contexts. Specifically:
77  * - The runpool holds a refcount of how many contexts in the runpool have this
78  * attribute.
79  * - The context holds a refcount of how many atoms have this attribute.
80  *
81  * KBASEP_JS_CTX_ATTR_COMPUTE:
82  * Attribute indicating a context that contains Compute jobs. That is,
83  * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
84  *
85  * @note A context can be both 'Compute' and 'Non Compute' if it contains
86  * both types of jobs.
87  *
88  * KBASEP_JS_CTX_ATTR_NON_COMPUTE:
89  * Attribute indicating a context that contains Non-Compute jobs. That is,
90  * the context has some jobs that are \b not of type @ref
91  * BASE_JD_REQ_ONLY_COMPUTE.
92  *
93  * @note A context can be both 'Compute' and 'Non Compute' if it contains
94  * both types of jobs.
95  *
96  * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES:
97  * Attribute indicating that a context contains compute-job atoms that
98  * aren't restricted to a coherent group, and can run on all cores.
99  *
100  * Specifically, this is when the atom's \a core_req satisfy:
101  * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
102  * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
103  *
104  * Such atoms could be blocked from running if one of the coherent groups
105  * is being used by another job slot, so tracking this context attribute
106  * allows us to prevent such situations.
107  *
108  * @note This doesn't take into account the 1-coregroup case, where all
109  * compute atoms would effectively be able to run on 'all cores', but
110  * contexts will still not always get marked with this attribute. Instead,
111  * it is the caller's responsibility to take into account the number of
112  * coregroups when interpreting this attribute.
113  *
114  * @note Whilst Tiler atoms are normally combined with
115  * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
116  * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
117  * enough to handle anyway.
118  *
119  */
120 enum kbasep_js_ctx_attr {
121 	KBASEP_JS_CTX_ATTR_COMPUTE,
122 	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
123 	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
124 	KBASEP_JS_CTX_ATTR_COUNT
125 };
126 
127 enum {
128 	/*
129 	 * Bit indicating that new atom should be started because this atom
130 	 * completed
131 	 */
132 	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
133 	/*
134 	 * Bit indicating that the atom was evicted from the JS_NEXT registers
135 	 */
136 	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
137 };
138 
139 /**
140  * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...>
141  * bits
142  */
143 typedef u32 kbasep_js_atom_done_code;
144 
145 /*
146  * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode
147  */
148 enum {
149 	/*
150 	 * In this mode, higher priority atoms will be scheduled first,
151 	 * regardless of the context they belong to. Newly-runnable higher
152 	 * priority atoms can preempt lower priority atoms currently running on
153 	 * the GPU, even if they belong to a different context.
154 	 */
155 	KBASE_JS_SYSTEM_PRIORITY_MODE = 0,
156 
157 	/*
158 	 * In this mode, the highest-priority atom will be chosen from each
159 	 * context in turn using a round-robin algorithm, so priority only has
160 	 * an effect within the context an atom belongs to. Newly-runnable
161 	 * higher priority atoms can preempt the lower priority atoms currently
162 	 * running on the GPU, but only if they belong to the same context.
163 	 */
164 	KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE,
165 
166 	/* Must be the last in the enum */
167 	KBASE_JS_PRIORITY_MODE_COUNT,
168 };
169 
170 /*
171  * Internal atom priority defines for kbase_jd_atom::sched_prio
172  */
173 enum {
174 	KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0,
175 	KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST,
176 	KBASE_JS_ATOM_SCHED_PRIO_HIGH,
177 	KBASE_JS_ATOM_SCHED_PRIO_MED,
178 	KBASE_JS_ATOM_SCHED_PRIO_LOW,
179 	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
180 };
181 
182 /* Invalid priority for kbase_jd_atom::sched_prio */
183 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
184 
185 /* Default priority in the case of contexts with no atoms, or being lenient
186  * about invalid priorities from userspace.
187  */
188 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
189 
190 /* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits
191  * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels.
192  *
193  * Must be strictly larger than the number of bits to represent a bitmap of
194  * priorities, so that we can do calculations such as:
195  *   (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1
196  * ...without causing undefined behavior due to a shift beyond the width of the
197  * type
198  *
199  * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth
200  * moving to DECLARE_BITMAP()
201  */
202 typedef u8 kbase_js_prio_bitmap_t;
203 
204 /* Ordering modification for kbase_js_atom_runs_before() */
205 typedef u32 kbase_atom_ordering_flag_t;
206 
207 /* Atoms of the same context and priority should have their ordering decided by
208  * their seq_nr instead of their age.
209  *
210  * seq_nr is used as a more slowly changing variant of age - it increases once
211  * per group of related atoms, as determined by user-space. Hence, it can be
212  * used to limit re-ordering decisions (such as pre-emption) to only re-order
213  * between such groups, rather than re-order within those groups of atoms.
214  */
215 #define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0)
216 
217 /**
218  * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure
219  * @runpool_irq: Sub-structure to collect together Job Scheduling data used in
220  *               IRQ context. The hwaccess_lock must be held when accessing.
221  * @runpool_irq.submit_allowed: Bitvector indicating whether a currently
222  *                              scheduled context is allowed to submit jobs.
223  *                              When bit 'N' is set in this, it indicates whether
224  *                              the context bound to address space 'N' is
225  *                              allowed to submit jobs.
226  * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters:
227  *     Each is large enough to hold a refcount of the number of contexts
228  *     that can fit into the runpool. This is currently BASE_MAX_NR_AS.
229  *     Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
230  *     the refcount. Hence, it's not worthwhile reducing this to
231  *     bit-manipulation on u32s to save space (where in contrast, 4 bit
232  *     sub-fields would be easy to do and would save space).
233  *     Whilst this must not become negative, the sign bit is used for:
234  *       - error detection in debug builds
235  *       - Optimization: it is undefined for a signed int to overflow, and so
236  *         the compiler can optimize for that never happening (thus, no masking
237  *         is required on updating the variable)
238  * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector
239  *                               to aid affinity checking.
240  *                               Element 'n' bit 'i' indicates that slot 'n'
241  *                               is using core i (i.e. slot_affinity_refcount[n][i] > 0)
242  * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned
243  *     by each slot. Used to generate the slot_affinities array of bitvectors.
244  *     The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
245  *     because it is refcounted only when a job is definitely about to be
246  *     submitted to a slot, and is de-refcounted immediately after a job
247  *     finishes
248  * @schedule_sem: Scheduling semaphore. This must be held when calling
249  *                kbase_jm_kick()
250  * @ctx_list_pullable: List of contexts that can currently be pulled from
251  * @ctx_list_unpullable: List of contexts that can not currently be pulled
252  *                       from, but have jobs currently running.
253  * @nr_user_contexts_running: Number of currently scheduled user contexts
254  *                            (excluding ones that are not submitting jobs)
255  * @nr_all_contexts_running: Number of currently scheduled contexts (including
256  *                           ones that are not submitting jobs)
257  * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber
258  *           @note This is a write-once member, and so no locking is required to
259  *           read
260  * @scheduling_period_ns:	Value for JS_SCHEDULING_PERIOD_NS
261  * @soft_stop_ticks:		Value for JS_SOFT_STOP_TICKS
262  * @soft_stop_ticks_cl:		Value for JS_SOFT_STOP_TICKS_CL
263  * @hard_stop_ticks_ss:		Value for JS_HARD_STOP_TICKS_SS
264  * @hard_stop_ticks_cl:		Value for JS_HARD_STOP_TICKS_CL
265  * @hard_stop_ticks_dumping:	Value for JS_HARD_STOP_TICKS_DUMPING
266  * @gpu_reset_ticks_ss:		Value for JS_RESET_TICKS_SS
267  * @gpu_reset_ticks_cl:		Value for JS_RESET_TICKS_CL
268  * @gpu_reset_ticks_dumping:	Value for JS_RESET_TICKS_DUMPING
269  * @ctx_timeslice_ns:		Value for JS_CTX_TIMESLICE_NS
270  * @suspended_soft_jobs_list:	List of suspended soft jobs
271  * @softstop_always:		Support soft-stop on a single context
272  * @init_status:The initialized-flag is placed at the end, to avoid
273  *              cache-pollution (we should only be using this during init/term paths).
274  *              @note This is a write-once member, and so no locking is required to
275  *              read
276  * @nr_contexts_pullable:Number of contexts that can currently be pulled from
277  * @nr_contexts_runnable:Number of contexts that can either be pulled from or
278  *                       arecurrently running
279  * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT
280  * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free.
281  * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts
282  *               independently of the Run Pool.
283  *               Of course, you don't need the Run Pool lock to access this.
284  * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool.
285  *
286  * This encapsulates the current context of the Job Scheduler on a particular
287  * device. This context is global to the device, and is not tied to any
288  * particular struct kbase_context running on the device.
289  *
290  * nr_contexts_running and as_free are optimized for packing together (by making
291  * them smaller types than u32). The operations on them should rarely involve
292  * masking. The use of signed types for arithmetic indicates to the compiler
293  * that the value will not rollover (which would be undefined behavior), and so
294  * under the Total License model, it is free to make optimizations based on
295  * that (i.e. to remove masking).
296  */
297 struct kbasep_js_device_data {
298 	struct runpool_irq {
299 		u16 submit_allowed;
300 		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
301 		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
302 		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
303 	} runpool_irq;
304 	struct semaphore schedule_sem;
305 	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]
306 					  [KBASE_JS_ATOM_SCHED_PRIO_COUNT];
307 	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]
308 					    [KBASE_JS_ATOM_SCHED_PRIO_COUNT];
309 	s8 nr_user_contexts_running;
310 	s8 nr_all_contexts_running;
311 	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
312 
313 	u32 scheduling_period_ns;
314 	u32 soft_stop_ticks;
315 	u32 soft_stop_ticks_cl;
316 	u32 hard_stop_ticks_ss;
317 	u32 hard_stop_ticks_cl;
318 	u32 hard_stop_ticks_dumping;
319 	u32 gpu_reset_ticks_ss;
320 	u32 gpu_reset_ticks_cl;
321 	u32 gpu_reset_ticks_dumping;
322 	u32 ctx_timeslice_ns;
323 
324 	struct list_head suspended_soft_jobs_list;
325 
326 #ifdef CONFIG_MALI_BIFROST_DEBUG
327 	bool softstop_always;
328 #endif				/* CONFIG_MALI_BIFROST_DEBUG */
329 	int init_status;
330 	u32 nr_contexts_pullable;
331 	atomic_t nr_contexts_runnable;
332 	atomic_t soft_job_timeout_ms;
333 	u32 js_free_wait_time_ms;
334 
335 	struct mutex queue_mutex;
336 	/*
337 	 * Run Pool mutex, for managing contexts within the runpool.
338 	 * Unless otherwise specified, you must hold this lock whilst accessing
339 	 * any members that follow
340 	 *
341 	 * In addition, this is used to access:
342 	 * * the kbasep_js_kctx_info::runpool substructure
343 	 */
344 	struct mutex runpool_mutex;
345 };
346 
347 /**
348  * struct kbasep_js_kctx_info - KBase Context Job Scheduling information
349  *	structure
350  * @ctx: Job Scheduler Context information sub-structure.Its members are
351  *	accessed regardless of whether the context is:
352  *	- In the Policy's Run Pool
353  *	- In the Policy's Queue
354  *	- Not queued nor in the Run Pool.
355  *	You must obtain the @ctx.jsctx_mutex before accessing any other members
356  *	of this substructure.
357  *	You may not access any of its members from IRQ context.
358  * @ctx.jsctx_mutex: Job Scheduler Context lock
359  * @ctx.nr_jobs: Number of jobs <b>ready to run</b> - does \em not include
360  *	the jobs waiting in the dispatcher, and dependency-only
361  *	jobs. See kbase_jd_context::job_nr for such jobs
362  * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough
363  *	to hold a refcount of the number of atoms on the context.
364  * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state
365  *	changes.
366  * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on
367  *	one list per job slot.
368  * @init_status: The initalized-flag is placed at the end, to avoid
369  *	cache-pollution (we should only be using this during init/term paths)
370  *
371  * This is a substructure in the struct kbase_context that encapsulates all the
372  * scheduling information.
373  */
374 struct kbasep_js_kctx_info {
375 	struct kbase_jsctx {
376 		struct mutex jsctx_mutex;
377 
378 		u32 nr_jobs;
379 		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
380 		wait_queue_head_t is_scheduled_wait;
381 		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
382 	} ctx;
383 	int init_status;
384 };
385 
386 /**
387  * struct kbasep_js_atom_retained_state - Subset of atom state.
388  * @event_code: to determine whether the atom has finished
389  * @core_req: core requirements
390  * @sched_priority: priority
391  * @device_nr: Core group atom was executed on
392  *
393  * Subset of atom state that can be available after kbase_jd_done_nolock() is called
394  * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
395  * because the original atom could disappear.
396  */
397 struct kbasep_js_atom_retained_state {
398 	/* Event code - to determine whether the atom has finished */
399 	enum base_jd_event_code event_code;
400 	/* core requirements */
401 	base_jd_core_req core_req;
402 	/* priority */
403 	int sched_priority;
404 	/* Core group atom was executed on */
405 	u32 device_nr;
406 
407 };
408 
409 /*
410  * Value signifying 'no retry on a slot required' for:
411  * - kbase_js_atom_retained_state::retry_submit_on_slot
412  * - kbase_jd_atom::retry_submit_on_slot
413  */
414 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
415 
416 /*
417  * base_jd_core_req value signifying 'invalid' for a
418  * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid()
419  */
420 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
421 
422 /*
423  * The JS timer resolution, in microseconds
424  * Any non-zero difference in time will be at least this size.
425  */
426 #define KBASEP_JS_TICK_RESOLUTION_US 1
427 
428 /**
429  * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's
430  *                                    use of a job slot
431  * @blocked: bitmap of priorities that this slot is blocked at
432  * @atoms_pulled: counts of atoms that have been pulled from this slot,
433  *                across all priority levels
434  * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per
435  *                    priority level
436  *
437  * Controls how a slot from the &struct kbase_context's jsctx_queue is managed,
438  * for example to ensure correct ordering of atoms when atoms of different
439  * priorities are unpulled.
440  */
441 struct kbase_jsctx_slot_tracking {
442 	kbase_js_prio_bitmap_t blocked;
443 	atomic_t atoms_pulled;
444 	int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT];
445 };
446 
447 #endif /* _KBASE_JS_DEFS_H_ */
448