1 /* 2 * 3 * (C) COPYRIGHT 2011-2017 ARM Limited. All rights reserved. 4 * 5 * This program is free software and is provided to you under the terms of the 6 * GNU General Public License version 2 as published by the Free Software 7 * Foundation, and any use by you of this program is subject to the terms 8 * of such GNU licence. 9 * 10 * A copy of the licence is included with the program, and can also be obtained 11 * from Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 12 * Boston, MA 02110-1301, USA. 13 * 14 */ 15 16 17 18 19 20 /** 21 * @file mali_kbase_js.h 22 * Job Scheduler Type Definitions 23 */ 24 25 #ifndef _KBASE_JS_DEFS_H_ 26 #define _KBASE_JS_DEFS_H_ 27 28 /** 29 * @addtogroup base_api 30 * @{ 31 */ 32 33 /** 34 * @addtogroup base_kbase_api 35 * @{ 36 */ 37 38 /** 39 * @addtogroup kbase_js 40 * @{ 41 */ 42 /* Forward decls */ 43 struct kbase_device; 44 struct kbase_jd_atom; 45 46 47 typedef u32 kbase_context_flags; 48 49 struct kbasep_atom_req { 50 base_jd_core_req core_req; 51 kbase_context_flags ctx_req; 52 u32 device_nr; 53 }; 54 55 /** Callback function run on all of a context's jobs registered with the Job 56 * Scheduler */ 57 typedef void (*kbasep_js_ctx_job_cb)(struct kbase_device *kbdev, struct kbase_jd_atom *katom); 58 59 /** 60 * @brief Maximum number of jobs that can be submitted to a job slot whilst 61 * inside the IRQ handler. 62 * 63 * This is important because GPU NULL jobs can complete whilst the IRQ handler 64 * is running. Otherwise, it potentially allows an unlimited number of GPU NULL 65 * jobs to be submitted inside the IRQ handler, which increases IRQ latency. 66 */ 67 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 68 69 /** 70 * @brief Context attributes 71 * 72 * Each context attribute can be thought of as a boolean value that caches some 73 * state information about either the runpool, or the context: 74 * - In the case of the runpool, it is a cache of "Do any contexts owned by 75 * the runpool have attribute X?" 76 * - In the case of a context, it is a cache of "Do any atoms owned by the 77 * context have attribute X?" 78 * 79 * The boolean value of the context attributes often affect scheduling 80 * decisions, such as affinities to use and job slots to use. 81 * 82 * To accomodate changes of state in the context, each attribute is refcounted 83 * in the context, and in the runpool for all running contexts. Specifically: 84 * - The runpool holds a refcount of how many contexts in the runpool have this 85 * attribute. 86 * - The context holds a refcount of how many atoms have this attribute. 87 */ 88 enum kbasep_js_ctx_attr { 89 /** Attribute indicating a context that contains Compute jobs. That is, 90 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE 91 * 92 * @note A context can be both 'Compute' and 'Non Compute' if it contains 93 * both types of jobs. 94 */ 95 KBASEP_JS_CTX_ATTR_COMPUTE, 96 97 /** Attribute indicating a context that contains Non-Compute jobs. That is, 98 * the context has some jobs that are \b not of type @ref 99 * BASE_JD_REQ_ONLY_COMPUTE. 100 * 101 * @note A context can be both 'Compute' and 'Non Compute' if it contains 102 * both types of jobs. 103 */ 104 KBASEP_JS_CTX_ATTR_NON_COMPUTE, 105 106 /** Attribute indicating that a context contains compute-job atoms that 107 * aren't restricted to a coherent group, and can run on all cores. 108 * 109 * Specifically, this is when the atom's \a core_req satisfy: 110 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 111 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups 112 * 113 * Such atoms could be blocked from running if one of the coherent groups 114 * is being used by another job slot, so tracking this context attribute 115 * allows us to prevent such situations. 116 * 117 * @note This doesn't take into account the 1-coregroup case, where all 118 * compute atoms would effectively be able to run on 'all cores', but 119 * contexts will still not always get marked with this attribute. Instead, 120 * it is the caller's responsibility to take into account the number of 121 * coregroups when interpreting this attribute. 122 * 123 * @note Whilst Tiler atoms are normally combined with 124 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without 125 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy 126 * enough to handle anyway. 127 */ 128 KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, 129 130 /** Must be the last in the enum */ 131 KBASEP_JS_CTX_ATTR_COUNT 132 }; 133 134 enum { 135 /** Bit indicating that new atom should be started because this atom completed */ 136 KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), 137 /** Bit indicating that the atom was evicted from the JS_NEXT registers */ 138 KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) 139 }; 140 141 /** Combination of KBASE_JS_ATOM_DONE_<...> bits */ 142 typedef u32 kbasep_js_atom_done_code; 143 144 /** 145 * @brief KBase Device Data Job Scheduler sub-structure 146 * 147 * This encapsulates the current context of the Job Scheduler on a particular 148 * device. This context is global to the device, and is not tied to any 149 * particular struct kbase_context running on the device. 150 * 151 * nr_contexts_running and as_free are optimized for packing together (by making 152 * them smaller types than u32). The operations on them should rarely involve 153 * masking. The use of signed types for arithmetic indicates to the compiler that 154 * the value will not rollover (which would be undefined behavior), and so under 155 * the Total License model, it is free to make optimizations based on that (i.e. 156 * to remove masking). 157 */ 158 struct kbasep_js_device_data { 159 /* Sub-structure to collect together Job Scheduling data used in IRQ 160 * context. The hwaccess_lock must be held when accessing. */ 161 struct runpool_irq { 162 /** Bitvector indicating whether a currently scheduled context is allowed to submit jobs. 163 * When bit 'N' is set in this, it indicates whether the context bound to address space 164 * 'N' is allowed to submit jobs. 165 */ 166 u16 submit_allowed; 167 168 /** Context Attributes: 169 * Each is large enough to hold a refcount of the number of contexts 170 * that can fit into the runpool. This is currently BASE_MAX_NR_AS 171 * 172 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store 173 * the refcount. Hence, it's not worthwhile reducing this to 174 * bit-manipulation on u32s to save space (where in contrast, 4 bit 175 * sub-fields would be easy to do and would save space). 176 * 177 * Whilst this must not become negative, the sign bit is used for: 178 * - error detection in debug builds 179 * - Optimization: it is undefined for a signed int to overflow, and so 180 * the compiler can optimize for that never happening (thus, no masking 181 * is required on updating the variable) */ 182 s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; 183 184 /* 185 * Affinity management and tracking 186 */ 187 /** Bitvector to aid affinity checking. Element 'n' bit 'i' indicates 188 * that slot 'n' is using core i (i.e. slot_affinity_refcount[n][i] > 0) */ 189 u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; 190 /** Refcount for each core owned by each slot. Used to generate the 191 * slot_affinities array of bitvectors 192 * 193 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, 194 * because it is refcounted only when a job is definitely about to be 195 * submitted to a slot, and is de-refcounted immediately after a job 196 * finishes */ 197 s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; 198 } runpool_irq; 199 200 /** 201 * Run Pool mutex, for managing contexts within the runpool. 202 * Unless otherwise specified, you must hold this lock whilst accessing any 203 * members that follow 204 * 205 * In addition, this is used to access: 206 * - the kbasep_js_kctx_info::runpool substructure 207 */ 208 struct mutex runpool_mutex; 209 210 /** 211 * Queue Lock, used to access the Policy's queue of contexts independently 212 * of the Run Pool. 213 * 214 * Of course, you don't need the Run Pool lock to access this. 215 */ 216 struct mutex queue_mutex; 217 218 /** 219 * Scheduling semaphore. This must be held when calling 220 * kbase_jm_kick() 221 */ 222 struct semaphore schedule_sem; 223 224 /** 225 * List of contexts that can currently be pulled from 226 */ 227 struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS]; 228 /** 229 * List of contexts that can not currently be pulled from, but have 230 * jobs currently running. 231 */ 232 struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS]; 233 234 /** Number of currently scheduled user contexts (excluding ones that are not submitting jobs) */ 235 s8 nr_user_contexts_running; 236 /** Number of currently scheduled contexts (including ones that are not submitting jobs) */ 237 s8 nr_all_contexts_running; 238 239 /** Core Requirements to match up with base_js_atom's core_req memeber 240 * @note This is a write-once member, and so no locking is required to read */ 241 base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; 242 243 u32 scheduling_period_ns; /*< Value for JS_SCHEDULING_PERIOD_NS */ 244 u32 soft_stop_ticks; /*< Value for JS_SOFT_STOP_TICKS */ 245 u32 soft_stop_ticks_cl; /*< Value for JS_SOFT_STOP_TICKS_CL */ 246 u32 hard_stop_ticks_ss; /*< Value for JS_HARD_STOP_TICKS_SS */ 247 u32 hard_stop_ticks_cl; /*< Value for JS_HARD_STOP_TICKS_CL */ 248 u32 hard_stop_ticks_dumping; /*< Value for JS_HARD_STOP_TICKS_DUMPING */ 249 u32 gpu_reset_ticks_ss; /*< Value for JS_RESET_TICKS_SS */ 250 u32 gpu_reset_ticks_cl; /*< Value for JS_RESET_TICKS_CL */ 251 u32 gpu_reset_ticks_dumping; /*< Value for JS_RESET_TICKS_DUMPING */ 252 u32 ctx_timeslice_ns; /**< Value for JS_CTX_TIMESLICE_NS */ 253 254 /**< Value for JS_SOFT_JOB_TIMEOUT */ 255 atomic_t soft_job_timeout_ms; 256 257 /** List of suspended soft jobs */ 258 struct list_head suspended_soft_jobs_list; 259 260 #ifdef CONFIG_MALI_DEBUG 261 /* Support soft-stop on a single context */ 262 bool softstop_always; 263 #endif /* CONFIG_MALI_DEBUG */ 264 265 /** The initalized-flag is placed at the end, to avoid cache-pollution (we should 266 * only be using this during init/term paths). 267 * @note This is a write-once member, and so no locking is required to read */ 268 int init_status; 269 270 /* Number of contexts that can currently be pulled from */ 271 u32 nr_contexts_pullable; 272 273 /* Number of contexts that can either be pulled from or are currently 274 * running */ 275 atomic_t nr_contexts_runnable; 276 }; 277 278 /** 279 * @brief KBase Context Job Scheduling information structure 280 * 281 * This is a substructure in the struct kbase_context that encapsulates all the 282 * scheduling information. 283 */ 284 struct kbasep_js_kctx_info { 285 286 /** 287 * Job Scheduler Context information sub-structure. These members are 288 * accessed regardless of whether the context is: 289 * - In the Policy's Run Pool 290 * - In the Policy's Queue 291 * - Not queued nor in the Run Pool. 292 * 293 * You must obtain the jsctx_mutex before accessing any other members of 294 * this substructure. 295 * 296 * You may not access any of these members from IRQ context. 297 */ 298 struct kbase_jsctx { 299 struct mutex jsctx_mutex; /**< Job Scheduler Context lock */ 300 301 /** Number of jobs <b>ready to run</b> - does \em not include the jobs waiting in 302 * the dispatcher, and dependency-only jobs. See kbase_jd_context::job_nr 303 * for such jobs*/ 304 u32 nr_jobs; 305 306 /** Context Attributes: 307 * Each is large enough to hold a refcount of the number of atoms on 308 * the context. **/ 309 u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; 310 311 /** 312 * Wait queue to wait for KCTX_SHEDULED flag state changes. 313 * */ 314 wait_queue_head_t is_scheduled_wait; 315 316 /** Link implementing JS queues. Context can be present on one 317 * list per job slot 318 */ 319 struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; 320 } ctx; 321 322 /* The initalized-flag is placed at the end, to avoid cache-pollution (we should 323 * only be using this during init/term paths) */ 324 int init_status; 325 }; 326 327 /** Subset of atom state that can be available after jd_done_nolock() is called 328 * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), 329 * because the original atom could disappear. */ 330 struct kbasep_js_atom_retained_state { 331 /** Event code - to determine whether the atom has finished */ 332 enum base_jd_event_code event_code; 333 /** core requirements */ 334 base_jd_core_req core_req; 335 /* priority */ 336 int sched_priority; 337 /** Job Slot to retry submitting to if submission from IRQ handler failed */ 338 int retry_submit_on_slot; 339 /* Core group atom was executed on */ 340 u32 device_nr; 341 342 }; 343 344 /** 345 * Value signifying 'no retry on a slot required' for: 346 * - kbase_js_atom_retained_state::retry_submit_on_slot 347 * - kbase_jd_atom::retry_submit_on_slot 348 */ 349 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) 350 351 /** 352 * base_jd_core_req value signifying 'invalid' for a kbase_jd_atom_retained_state. 353 * 354 * @see kbase_atom_retained_state_is_valid() 355 */ 356 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP 357 358 /** 359 * @brief The JS timer resolution, in microseconds 360 * 361 * Any non-zero difference in time will be at least this size. 362 */ 363 #define KBASEP_JS_TICK_RESOLUTION_US 1 364 365 /* 366 * Internal atom priority defines for kbase_jd_atom::sched_prio 367 */ 368 enum { 369 KBASE_JS_ATOM_SCHED_PRIO_HIGH = 0, 370 KBASE_JS_ATOM_SCHED_PRIO_MED, 371 KBASE_JS_ATOM_SCHED_PRIO_LOW, 372 KBASE_JS_ATOM_SCHED_PRIO_COUNT, 373 }; 374 375 /* Invalid priority for kbase_jd_atom::sched_prio */ 376 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 377 378 /* Default priority in the case of contexts with no atoms, or being lenient 379 * about invalid priorities from userspace */ 380 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED 381 382 /** @} *//* end group kbase_js */ 383 /** @} *//* end group base_kbase_api */ 384 /** @} *//* end group base_api */ 385 386 #endif /* _KBASE_JS_DEFS_H_ */ 387