xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/midgard/mali_kbase_js_defs.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /*
2  *
3  * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved.
4  *
5  * This program is free software and is provided to you under the terms of the
6  * GNU General Public License version 2 as published by the Free Software
7  * Foundation, and any use by you of this program is subject to the terms
8  * of such GNU licence.
9  *
10  * A copy of the licence is included with the program, and can also be obtained
11  * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
12  * Boston, MA  02110-1301, USA.
13  *
14  */
15 
16 
17 
18 
19 
20 /**
21  * @file mali_kbase_js.h
22  * Job Scheduler Type Definitions
23  */
24 
25 #ifndef _KBASE_JS_DEFS_H_
26 #define _KBASE_JS_DEFS_H_
27 
28 /**
29  * @addtogroup base_api
30  * @{
31  */
32 
33 /**
34  * @addtogroup base_kbase_api
35  * @{
36  */
37 
38 /**
39  * @addtogroup kbase_js
40  * @{
41  */
42 /* Forward decls */
43 struct kbase_device;
44 struct kbase_jd_atom;
45 
46 
47 typedef u32 kbase_context_flags;
48 
49 struct kbasep_atom_req {
50 	base_jd_core_req core_req;
51 	kbase_context_flags ctx_req;
52 	u32 device_nr;
53 };
54 
55 /** Callback function run on all of a context's jobs registered with the Job
56  * Scheduler */
57 typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom);
58 
59 /**
60  * @brief Maximum number of jobs that can be submitted to a job slot whilst
61  * inside the IRQ handler.
62  *
63  * This is important because GPU NULL jobs can complete whilst the IRQ handler
64  * is running. Otherwise, it potentially allows an unlimited number of GPU NULL
65  * jobs to be submitted inside the IRQ handler, which increases IRQ latency.
66  */
67 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2
68 
69 /**
70  * @brief Context attributes
71  *
72  * Each context attribute can be thought of as a boolean value that caches some
73  * state information about either the runpool, or the context:
74  * - In the case of the runpool, it is a cache of "Do any contexts owned by
75  * the runpool have attribute X?"
76  * - In the case of a context, it is a cache of "Do any atoms owned by the
77  * context have attribute X?"
78  *
79  * The boolean value of the context attributes often affect scheduling
80  * decisions, such as affinities to use and job slots to use.
81  *
82  * To accomodate changes of state in the context, each attribute is refcounted
83  * in the context, and in the runpool for all running contexts. Specifically:
84  * - The runpool holds a refcount of how many contexts in the runpool have this
85  * attribute.
86  * - The context holds a refcount of how many atoms have this attribute.
87  */
88 enum kbasep_js_ctx_attr {
89 	/** Attribute indicating a context that contains Compute jobs. That is,
90 	 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE
91 	 *
92 	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
93 	 * both types of jobs.
94 	 */
95 	KBASEP_JS_CTX_ATTR_COMPUTE,
96 
97 	/** Attribute indicating a context that contains Non-Compute jobs. That is,
98 	 * the context has some jobs that are \b not of type @ref
99 	 * BASE_JD_REQ_ONLY_COMPUTE.
100 	 *
101 	 * @note A context can be both 'Compute' and 'Non Compute' if it contains
102 	 * both types of jobs.
103 	 */
104 	KBASEP_JS_CTX_ATTR_NON_COMPUTE,
105 
106 	/** Attribute indicating that a context contains compute-job atoms that
107 	 * aren't restricted to a coherent group, and can run on all cores.
108 	 *
109 	 * Specifically, this is when the atom's \a core_req satisfy:
110 	 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2
111 	 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups
112 	 *
113 	 * Such atoms could be blocked from running if one of the coherent groups
114 	 * is being used by another job slot, so tracking this context attribute
115 	 * allows us to prevent such situations.
116 	 *
117 	 * @note This doesn't take into account the 1-coregroup case, where all
118 	 * compute atoms would effectively be able to run on 'all cores', but
119 	 * contexts will still not always get marked with this attribute. Instead,
120 	 * it is the caller's responsibility to take into account the number of
121 	 * coregroups when interpreting this attribute.
122 	 *
123 	 * @note Whilst Tiler atoms are normally combined with
124 	 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without
125 	 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy
126 	 * enough to handle anyway.
127 	 */
128 	KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES,
129 
130 	/** Must be the last in the enum */
131 	KBASEP_JS_CTX_ATTR_COUNT
132 };
133 
134 enum {
135 	/** Bit indicating that new atom should be started because this atom completed */
136 	KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0),
137 	/** Bit indicating that the atom was evicted from the JS_NEXT registers */
138 	KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1)
139 };
140 
141 /** Combination of KBASE_JS_ATOM_DONE_<...> bits */
142 typedef u32 kbasep_js_atom_done_code;
143 
144 /**
145  * @brief KBase Device Data Job Scheduler sub-structure
146  *
147  * This encapsulates the current context of the Job Scheduler on a particular
148  * device. This context is global to the device, and is not tied to any
149  * particular struct kbase_context running on the device.
150  *
151  * nr_contexts_running and as_free are optimized for packing together (by making
152  * them smaller types than u32). The operations on them should rarely involve
153  * masking. The use of signed types for arithmetic indicates to the compiler that
154  * the value will not rollover (which would be undefined behavior), and so under
155  * the Total License model, it is free to make optimizations based on that (i.e.
156  * to remove masking).
157  */
158 struct kbasep_js_device_data {
159 	/* Sub-structure to collect together Job Scheduling data used in IRQ
160 	 * context. The hwaccess_lock must be held when accessing. */
161 	struct runpool_irq {
162 		/** Bitvector indicating whether a currently scheduled context is allowed to submit jobs.
163 		 * When bit 'N' is set in this, it indicates whether the context bound to address space
164 		 * 'N' is allowed to submit jobs.
165 		 */
166 		u16 submit_allowed;
167 
168 		/** Context Attributes:
169 		 * Each is large enough to hold a refcount of the number of contexts
170 		 * that can fit into the runpool. This is currently BASE_MAX_NR_AS
171 		 *
172 		 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store
173 		 * the refcount. Hence, it's not worthwhile reducing this to
174 		 * bit-manipulation on u32s to save space (where in contrast, 4 bit
175 		 * sub-fields would be easy to do and would save space).
176 		 *
177 		 * Whilst this must not become negative, the sign bit is used for:
178 		 * - error detection in debug builds
179 		 * - Optimization: it is undefined for a signed int to overflow, and so
180 		 * the compiler can optimize for that never happening (thus, no masking
181 		 * is required on updating the variable) */
182 		s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
183 
184 		/*
185 		 * Affinity management and tracking
186 		 */
187 		/** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates
188 		 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */
189 		u64 slot_affinities[BASE_JM_MAX_NR_SLOTS];
190 		/** Refcount for each core owned by each slot. Used to generate the
191 		 * slot_affinities array of bitvectors
192 		 *
193 		 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS,
194 		 * because it is refcounted only when a job is definitely about to be
195 		 * submitted to a slot, and is de-refcounted immediately after a job
196 		 * finishes */
197 		s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64];
198 	} runpool_irq;
199 
200 	/**
201 	 * Run Pool mutex, for managing contexts within the runpool.
202 	 * Unless otherwise specified, you must hold this lock whilst accessing any
203 	 * members that follow
204 	 *
205 	 * In addition, this is used to access:
206 	 * - the kbasep_js_kctx_info::runpool substructure
207 	 */
208 	struct mutex runpool_mutex;
209 
210 	/**
211 	 * Queue Lock, used to access the Policy's queue of contexts independently
212 	 * of the Run Pool.
213 	 *
214 	 * Of course, you don't need the Run Pool lock to access this.
215 	 */
216 	struct mutex queue_mutex;
217 
218 	/**
219 	 * Scheduling semaphore. This must be held when calling
220 	 * kbase_jm_kick()
221 	 */
222 	struct semaphore schedule_sem;
223 
224 	/**
225 	 * List of contexts that can currently be pulled from
226 	 */
227 	struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS];
228 	/**
229 	 * List of contexts that can not currently be pulled from, but have
230 	 * jobs currently running.
231 	 */
232 	struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS];
233 
234 	/** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */
235 	s8 nr_user_contexts_running;
236 	/** Number of currently scheduled contexts (including ones that are not submitting jobs) */
237 	s8 nr_all_contexts_running;
238 
239 	/** Core Requirements to match up with base_js_atom's core_req memeber
240 	 * @note This is a write-once member, and so no locking is required to read */
241 	base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS];
242 
243 	u32 scheduling_period_ns;    /*< Value for JS_SCHEDULING_PERIOD_NS */
244 	u32 soft_stop_ticks;	     /*< Value for JS_SOFT_STOP_TICKS */
245 	u32 soft_stop_ticks_cl;	     /*< Value for JS_SOFT_STOP_TICKS_CL */
246 	u32 hard_stop_ticks_ss;	     /*< Value for JS_HARD_STOP_TICKS_SS */
247 	u32 hard_stop_ticks_cl;	     /*< Value for JS_HARD_STOP_TICKS_CL */
248 	u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */
249 	u32 gpu_reset_ticks_ss;	     /*< Value for JS_RESET_TICKS_SS */
250 	u32 gpu_reset_ticks_cl;	     /*< Value for JS_RESET_TICKS_CL */
251 	u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */
252 	u32 ctx_timeslice_ns;		 /**< Value for JS_CTX_TIMESLICE_NS */
253 
254 	/**< Value for JS_SOFT_JOB_TIMEOUT */
255 	atomic_t soft_job_timeout_ms;
256 
257 	/** List of suspended soft jobs */
258 	struct list_head suspended_soft_jobs_list;
259 
260 #ifdef CONFIG_MALI_DEBUG
261 	/* Support soft-stop on a single context */
262 	bool softstop_always;
263 #endif				/* CONFIG_MALI_DEBUG */
264 
265 	/** The initalized-flag is placed at the end, to avoid cache-pollution (we should
266 	 * only be using this during init/term paths).
267 	 * @note This is a write-once member, and so no locking is required to read */
268 	int init_status;
269 
270 	/* Number of contexts that can currently be pulled from */
271 	u32 nr_contexts_pullable;
272 
273 	/* Number of contexts that can either be pulled from or are currently
274 	 * running */
275 	atomic_t nr_contexts_runnable;
276 };
277 
278 /**
279  * @brief KBase Context Job Scheduling information structure
280  *
281  * This is a substructure in the struct kbase_context that encapsulates all the
282  * scheduling information.
283  */
284 struct kbasep_js_kctx_info {
285 
286 	/**
287 	 * Job Scheduler Context information sub-structure. These members are
288 	 * accessed regardless of whether the context is:
289 	 * - In the Policy's Run Pool
290 	 * - In the Policy's Queue
291 	 * - Not queued nor in the Run Pool.
292 	 *
293 	 * You must obtain the jsctx_mutex before accessing any other members of
294 	 * this substructure.
295 	 *
296 	 * You may not access any of these members from IRQ context.
297 	 */
298 	struct kbase_jsctx {
299 		struct mutex jsctx_mutex;		    /**< Job Scheduler Context lock */
300 
301 		/** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in
302 		 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr
303 		 * for such jobs*/
304 		u32 nr_jobs;
305 
306 		/** Context Attributes:
307 		 * Each is large enough to hold a refcount of the number of atoms on
308 		 * the context. **/
309 		u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT];
310 
311 		/**
312 		 * Wait queue to wait for KCTX_SHEDULED flag state changes.
313 		 * */
314 		wait_queue_head_t is_scheduled_wait;
315 
316 		/** Link implementing JS queues. Context can be present on one
317 		 * list per job slot
318 		 */
319 		struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS];
320 	} ctx;
321 
322 	/* The initalized-flag is placed at the end, to avoid cache-pollution (we should
323 	 * only be using this during init/term paths) */
324 	int init_status;
325 };
326 
327 /** Subset of atom state that can be available after jd_done_nolock() is called
328  * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(),
329  * because the original atom could disappear. */
330 struct kbasep_js_atom_retained_state {
331 	/** Event code - to determine whether the atom has finished */
332 	enum base_jd_event_code event_code;
333 	/** core requirements */
334 	base_jd_core_req core_req;
335 	/* priority */
336 	int sched_priority;
337 	/** Job Slot to retry submitting to if submission from IRQ handler failed */
338 	int retry_submit_on_slot;
339 	/* Core group atom was executed on */
340 	u32 device_nr;
341 
342 };
343 
344 /**
345  * Value signifying 'no retry on a slot required' for:
346  * - kbase_js_atom_retained_state::retry_submit_on_slot
347  * - kbase_jd_atom::retry_submit_on_slot
348  */
349 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1)
350 
351 /**
352  * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state.
353  *
354  * @see kbase_atom_retained_state_is_valid()
355  */
356 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP
357 
358 /**
359  * @brief The JS timer resolution, in microseconds
360  *
361  * Any non-zero difference in time will be at least this size.
362  */
363 #define KBASEP_JS_TICK_RESOLUTION_US 1
364 
365 /*
366  * Internal atom priority defines for kbase_jd_atom::sched_prio
367  */
368 enum {
369 	KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0,
370 	KBASE_JS_ATOM_SCHED_PRIO_MED,
371 	KBASE_JS_ATOM_SCHED_PRIO_LOW,
372 	KBASE_JS_ATOM_SCHED_PRIO_COUNT,
373 };
374 
375 /* Invalid priority for kbase_jd_atom::sched_prio */
376 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1
377 
378 /* Default priority in the case of contexts with no atoms, or being lenient
379  * about invalid priorities from userspace */
380 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED
381 
382 	  /** @} *//* end group kbase_js */
383 	  /** @} *//* end group base_kbase_api */
384 	  /** @} *//* end group base_api */
385 
386 #endif				/* _KBASE_JS_DEFS_H_ */
387