1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* 3 * 4 * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved. 5 * 6 * This program is free software and is provided to you under the terms of the 7 * GNU General Public License version 2 as published by the Free Software 8 * Foundation, and any use by you of this program is subject to the terms 9 * of such GNU license. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, you can access it online at 18 * http://www.gnu.org/licenses/gpl-2.0.html. 19 * 20 */ 21 22 /** 23 * DOC: Job Scheduler Type Definitions 24 */ 25 26 #ifndef _KBASE_JS_DEFS_H_ 27 #define _KBASE_JS_DEFS_H_ 28 29 /* Forward decls */ 30 struct kbase_device; 31 struct kbase_jd_atom; 32 33 34 typedef u32 kbase_context_flags; 35 36 /* 37 * typedef kbasep_js_ctx_job_cb - Callback function run on all of a context's 38 * jobs registered with the Job Scheduler 39 */ 40 typedef void kbasep_js_ctx_job_cb(struct kbase_device *kbdev, 41 struct kbase_jd_atom *katom); 42 43 /* 44 * @brief Maximum number of jobs that can be submitted to a job slot whilst 45 * inside the IRQ handler. 46 * 47 * This is important because GPU NULL jobs can complete whilst the IRQ handler 48 * is running. Otherwise, it potentially allows an unlimited number of GPU NULL 49 * jobs to be submitted inside the IRQ handler, which increases IRQ latency. 50 */ 51 #define KBASE_JS_MAX_JOB_SUBMIT_PER_SLOT_PER_IRQ 2 52 53 /** 54 * enum kbasep_js_ctx_attr - Context attributes 55 * @KBASEP_JS_CTX_ATTR_COMPUTE: Attribute indicating a context that contains 56 * Compute jobs. 57 * @KBASEP_JS_CTX_ATTR_NON_COMPUTE: Attribute indicating a context that contains 58 * Non-Compute jobs. 59 * @KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: Attribute indicating that a context 60 * contains compute-job atoms that aren't 61 * restricted to a coherent group, 62 * and can run on all cores. 63 * @KBASEP_JS_CTX_ATTR_COUNT: Must be the last in the enum 64 * 65 * Each context attribute can be thought of as a boolean value that caches some 66 * state information about either the runpool, or the context: 67 * - In the case of the runpool, it is a cache of "Do any contexts owned by 68 * the runpool have attribute X?" 69 * - In the case of a context, it is a cache of "Do any atoms owned by the 70 * context have attribute X?" 71 * 72 * The boolean value of the context attributes often affect scheduling 73 * decisions, such as affinities to use and job slots to use. 74 * 75 * To accomodate changes of state in the context, each attribute is refcounted 76 * in the context, and in the runpool for all running contexts. Specifically: 77 * - The runpool holds a refcount of how many contexts in the runpool have this 78 * attribute. 79 * - The context holds a refcount of how many atoms have this attribute. 80 * 81 * KBASEP_JS_CTX_ATTR_COMPUTE: 82 * Attribute indicating a context that contains Compute jobs. That is, 83 * the context has jobs of type @ref BASE_JD_REQ_ONLY_COMPUTE 84 * 85 * @note A context can be both 'Compute' and 'Non Compute' if it contains 86 * both types of jobs. 87 * 88 * KBASEP_JS_CTX_ATTR_NON_COMPUTE: 89 * Attribute indicating a context that contains Non-Compute jobs. That is, 90 * the context has some jobs that are \b not of type @ref 91 * BASE_JD_REQ_ONLY_COMPUTE. 92 * 93 * @note A context can be both 'Compute' and 'Non Compute' if it contains 94 * both types of jobs. 95 * 96 * KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES: 97 * Attribute indicating that a context contains compute-job atoms that 98 * aren't restricted to a coherent group, and can run on all cores. 99 * 100 * Specifically, this is when the atom's \a core_req satisfy: 101 * - (\a core_req & (BASE_JD_REQ_CS | BASE_JD_REQ_ONLY_COMPUTE | BASE_JD_REQ_T) // uses slot 1 or slot 2 102 * - && !(\a core_req & BASE_JD_REQ_COHERENT_GROUP) // not restricted to coherent groups 103 * 104 * Such atoms could be blocked from running if one of the coherent groups 105 * is being used by another job slot, so tracking this context attribute 106 * allows us to prevent such situations. 107 * 108 * @note This doesn't take into account the 1-coregroup case, where all 109 * compute atoms would effectively be able to run on 'all cores', but 110 * contexts will still not always get marked with this attribute. Instead, 111 * it is the caller's responsibility to take into account the number of 112 * coregroups when interpreting this attribute. 113 * 114 * @note Whilst Tiler atoms are normally combined with 115 * BASE_JD_REQ_COHERENT_GROUP, it is possible to send such atoms without 116 * BASE_JD_REQ_COHERENT_GROUP set. This is an unlikely case, but it's easy 117 * enough to handle anyway. 118 * 119 */ 120 enum kbasep_js_ctx_attr { 121 KBASEP_JS_CTX_ATTR_COMPUTE, 122 KBASEP_JS_CTX_ATTR_NON_COMPUTE, 123 KBASEP_JS_CTX_ATTR_COMPUTE_ALL_CORES, 124 KBASEP_JS_CTX_ATTR_COUNT 125 }; 126 127 enum { 128 /* 129 * Bit indicating that new atom should be started because this atom 130 * completed 131 */ 132 KBASE_JS_ATOM_DONE_START_NEW_ATOMS = (1u << 0), 133 /* 134 * Bit indicating that the atom was evicted from the JS_NEXT registers 135 */ 136 KBASE_JS_ATOM_DONE_EVICTED_FROM_NEXT = (1u << 1) 137 }; 138 139 /** 140 * typedef kbasep_js_atom_done_code - Combination of KBASE_JS_ATOM_DONE_<...> 141 * bits 142 */ 143 typedef u32 kbasep_js_atom_done_code; 144 145 /* 146 * Context scheduling mode defines for kbase_device::js_ctx_scheduling_mode 147 */ 148 enum { 149 /* 150 * In this mode, higher priority atoms will be scheduled first, 151 * regardless of the context they belong to. Newly-runnable higher 152 * priority atoms can preempt lower priority atoms currently running on 153 * the GPU, even if they belong to a different context. 154 */ 155 KBASE_JS_SYSTEM_PRIORITY_MODE = 0, 156 157 /* 158 * In this mode, the highest-priority atom will be chosen from each 159 * context in turn using a round-robin algorithm, so priority only has 160 * an effect within the context an atom belongs to. Newly-runnable 161 * higher priority atoms can preempt the lower priority atoms currently 162 * running on the GPU, but only if they belong to the same context. 163 */ 164 KBASE_JS_PROCESS_LOCAL_PRIORITY_MODE, 165 166 /* Must be the last in the enum */ 167 KBASE_JS_PRIORITY_MODE_COUNT, 168 }; 169 170 /* 171 * Internal atom priority defines for kbase_jd_atom::sched_prio 172 */ 173 enum { 174 KBASE_JS_ATOM_SCHED_PRIO_FIRST = 0, 175 KBASE_JS_ATOM_SCHED_PRIO_REALTIME = KBASE_JS_ATOM_SCHED_PRIO_FIRST, 176 KBASE_JS_ATOM_SCHED_PRIO_HIGH, 177 KBASE_JS_ATOM_SCHED_PRIO_MED, 178 KBASE_JS_ATOM_SCHED_PRIO_LOW, 179 KBASE_JS_ATOM_SCHED_PRIO_COUNT, 180 }; 181 182 /* Invalid priority for kbase_jd_atom::sched_prio */ 183 #define KBASE_JS_ATOM_SCHED_PRIO_INVALID -1 184 185 /* Default priority in the case of contexts with no atoms, or being lenient 186 * about invalid priorities from userspace. 187 */ 188 #define KBASE_JS_ATOM_SCHED_PRIO_DEFAULT KBASE_JS_ATOM_SCHED_PRIO_MED 189 190 /* Atom priority bitmaps, where bit 0 is the highest priority, and higher bits 191 * indicate successively lower KBASE_JS_ATOM_SCHED_PRIO_<...> levels. 192 * 193 * Must be strictly larger than the number of bits to represent a bitmap of 194 * priorities, so that we can do calculations such as: 195 * (1 << KBASE_JS_ATOM_SCHED_PRIO_COUNT) - 1 196 * ...without causing undefined behavior due to a shift beyond the width of the 197 * type 198 * 199 * If KBASE_JS_ATOM_SCHED_PRIO_COUNT starts requiring 32 bits, then it's worth 200 * moving to DECLARE_BITMAP() 201 */ 202 typedef u8 kbase_js_prio_bitmap_t; 203 204 /* Ordering modification for kbase_js_atom_runs_before() */ 205 typedef u32 kbase_atom_ordering_flag_t; 206 207 /* Atoms of the same context and priority should have their ordering decided by 208 * their seq_nr instead of their age. 209 * 210 * seq_nr is used as a more slowly changing variant of age - it increases once 211 * per group of related atoms, as determined by user-space. Hence, it can be 212 * used to limit re-ordering decisions (such as pre-emption) to only re-order 213 * between such groups, rather than re-order within those groups of atoms. 214 */ 215 #define KBASE_ATOM_ORDERING_FLAG_SEQNR (((kbase_atom_ordering_flag_t)1) << 0) 216 217 /** 218 * struct kbasep_js_device_data - KBase Device Data Job Scheduler sub-structure 219 * @runpool_irq: Sub-structure to collect together Job Scheduling data used in 220 * IRQ context. The hwaccess_lock must be held when accessing. 221 * @runpool_irq.submit_allowed: Bitvector indicating whether a currently 222 * scheduled context is allowed to submit jobs. 223 * When bit 'N' is set in this, it indicates whether 224 * the context bound to address space 'N' is 225 * allowed to submit jobs. 226 * @runpool_irq.ctx_attr_ref_count: Array of Context Attributes Ref_counters: 227 * Each is large enough to hold a refcount of the number of contexts 228 * that can fit into the runpool. This is currently BASE_MAX_NR_AS. 229 * Note that when BASE_MAX_NR_AS==16 we need 5 bits (not 4) to store 230 * the refcount. Hence, it's not worthwhile reducing this to 231 * bit-manipulation on u32s to save space (where in contrast, 4 bit 232 * sub-fields would be easy to do and would save space). 233 * Whilst this must not become negative, the sign bit is used for: 234 * - error detection in debug builds 235 * - Optimization: it is undefined for a signed int to overflow, and so 236 * the compiler can optimize for that never happening (thus, no masking 237 * is required on updating the variable) 238 * @runpool_irq.slot_affinities: Affinity management and tracking. Bitvector 239 * to aid affinity checking. 240 * Element 'n' bit 'i' indicates that slot 'n' 241 * is using core i (i.e. slot_affinity_refcount[n][i] > 0) 242 * @runpool_irq.slot_affinity_refcount: Array of fefcount for each core owned 243 * by each slot. Used to generate the slot_affinities array of bitvectors. 244 * The value of the refcount will not exceed BASE_JM_SUBMIT_SLOTS, 245 * because it is refcounted only when a job is definitely about to be 246 * submitted to a slot, and is de-refcounted immediately after a job 247 * finishes 248 * @schedule_sem: Scheduling semaphore. This must be held when calling 249 * kbase_jm_kick() 250 * @ctx_list_pullable: List of contexts that can currently be pulled from 251 * @ctx_list_unpullable: List of contexts that can not currently be pulled 252 * from, but have jobs currently running. 253 * @nr_user_contexts_running: Number of currently scheduled user contexts 254 * (excluding ones that are not submitting jobs) 255 * @nr_all_contexts_running: Number of currently scheduled contexts (including 256 * ones that are not submitting jobs) 257 * @js_reqs: Core Requirements to match up with base_js_atom's core_req memeber 258 * @note This is a write-once member, and so no locking is required to 259 * read 260 * @scheduling_period_ns: Value for JS_SCHEDULING_PERIOD_NS 261 * @soft_stop_ticks: Value for JS_SOFT_STOP_TICKS 262 * @soft_stop_ticks_cl: Value for JS_SOFT_STOP_TICKS_CL 263 * @hard_stop_ticks_ss: Value for JS_HARD_STOP_TICKS_SS 264 * @hard_stop_ticks_cl: Value for JS_HARD_STOP_TICKS_CL 265 * @hard_stop_ticks_dumping: Value for JS_HARD_STOP_TICKS_DUMPING 266 * @gpu_reset_ticks_ss: Value for JS_RESET_TICKS_SS 267 * @gpu_reset_ticks_cl: Value for JS_RESET_TICKS_CL 268 * @gpu_reset_ticks_dumping: Value for JS_RESET_TICKS_DUMPING 269 * @ctx_timeslice_ns: Value for JS_CTX_TIMESLICE_NS 270 * @suspended_soft_jobs_list: List of suspended soft jobs 271 * @softstop_always: Support soft-stop on a single context 272 * @init_status:The initialized-flag is placed at the end, to avoid 273 * cache-pollution (we should only be using this during init/term paths). 274 * @note This is a write-once member, and so no locking is required to 275 * read 276 * @nr_contexts_pullable:Number of contexts that can currently be pulled from 277 * @nr_contexts_runnable:Number of contexts that can either be pulled from or 278 * arecurrently running 279 * @soft_job_timeout_ms:Value for JS_SOFT_JOB_TIMEOUT 280 * @js_free_wait_time_ms: Maximum waiting time in ms for a Job Slot to be seen free. 281 * @queue_mutex: Queue Lock, used to access the Policy's queue of contexts 282 * independently of the Run Pool. 283 * Of course, you don't need the Run Pool lock to access this. 284 * @runpool_mutex: Run Pool mutex, for managing contexts within the runpool. 285 * 286 * This encapsulates the current context of the Job Scheduler on a particular 287 * device. This context is global to the device, and is not tied to any 288 * particular struct kbase_context running on the device. 289 * 290 * nr_contexts_running and as_free are optimized for packing together (by making 291 * them smaller types than u32). The operations on them should rarely involve 292 * masking. The use of signed types for arithmetic indicates to the compiler 293 * that the value will not rollover (which would be undefined behavior), and so 294 * under the Total License model, it is free to make optimizations based on 295 * that (i.e. to remove masking). 296 */ 297 struct kbasep_js_device_data { 298 struct runpool_irq { 299 u16 submit_allowed; 300 s8 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; 301 u64 slot_affinities[BASE_JM_MAX_NR_SLOTS]; 302 s8 slot_affinity_refcount[BASE_JM_MAX_NR_SLOTS][64]; 303 } runpool_irq; 304 struct semaphore schedule_sem; 305 struct list_head ctx_list_pullable[BASE_JM_MAX_NR_SLOTS] 306 [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; 307 struct list_head ctx_list_unpullable[BASE_JM_MAX_NR_SLOTS] 308 [KBASE_JS_ATOM_SCHED_PRIO_COUNT]; 309 s8 nr_user_contexts_running; 310 s8 nr_all_contexts_running; 311 base_jd_core_req js_reqs[BASE_JM_MAX_NR_SLOTS]; 312 313 u32 scheduling_period_ns; 314 u32 soft_stop_ticks; 315 u32 soft_stop_ticks_cl; 316 u32 hard_stop_ticks_ss; 317 u32 hard_stop_ticks_cl; 318 u32 hard_stop_ticks_dumping; 319 u32 gpu_reset_ticks_ss; 320 u32 gpu_reset_ticks_cl; 321 u32 gpu_reset_ticks_dumping; 322 u32 ctx_timeslice_ns; 323 324 struct list_head suspended_soft_jobs_list; 325 326 #ifdef CONFIG_MALI_BIFROST_DEBUG 327 bool softstop_always; 328 #endif /* CONFIG_MALI_BIFROST_DEBUG */ 329 int init_status; 330 u32 nr_contexts_pullable; 331 atomic_t nr_contexts_runnable; 332 atomic_t soft_job_timeout_ms; 333 u32 js_free_wait_time_ms; 334 335 struct mutex queue_mutex; 336 /* 337 * Run Pool mutex, for managing contexts within the runpool. 338 * Unless otherwise specified, you must hold this lock whilst accessing 339 * any members that follow 340 * 341 * In addition, this is used to access: 342 * * the kbasep_js_kctx_info::runpool substructure 343 */ 344 struct mutex runpool_mutex; 345 }; 346 347 /** 348 * struct kbasep_js_kctx_info - KBase Context Job Scheduling information 349 * structure 350 * @ctx: Job Scheduler Context information sub-structure.Its members are 351 * accessed regardless of whether the context is: 352 * - In the Policy's Run Pool 353 * - In the Policy's Queue 354 * - Not queued nor in the Run Pool. 355 * You must obtain the @ctx.jsctx_mutex before accessing any other members 356 * of this substructure. 357 * You may not access any of its members from IRQ context. 358 * @ctx.jsctx_mutex: Job Scheduler Context lock 359 * @ctx.nr_jobs: Number of jobs <b>ready to run</b> - does \em not include 360 * the jobs waiting in the dispatcher, and dependency-only 361 * jobs. See kbase_jd_context::job_nr for such jobs 362 * @ctx.ctx_attr_ref_count: Context Attributes ref count. Each is large enough 363 * to hold a refcount of the number of atoms on the context. 364 * @ctx.is_scheduled_wait: Wait queue to wait for KCTX_SHEDULED flag state 365 * changes. 366 * @ctx.ctx_list_entry: Link implementing JS queues. Context can be present on 367 * one list per job slot. 368 * @init_status: The initalized-flag is placed at the end, to avoid 369 * cache-pollution (we should only be using this during init/term paths) 370 * 371 * This is a substructure in the struct kbase_context that encapsulates all the 372 * scheduling information. 373 */ 374 struct kbasep_js_kctx_info { 375 struct kbase_jsctx { 376 struct mutex jsctx_mutex; 377 378 u32 nr_jobs; 379 u32 ctx_attr_ref_count[KBASEP_JS_CTX_ATTR_COUNT]; 380 wait_queue_head_t is_scheduled_wait; 381 struct list_head ctx_list_entry[BASE_JM_MAX_NR_SLOTS]; 382 } ctx; 383 int init_status; 384 }; 385 386 /** 387 * struct kbasep_js_atom_retained_state - Subset of atom state. 388 * @event_code: to determine whether the atom has finished 389 * @core_req: core requirements 390 * @sched_priority: priority 391 * @device_nr: Core group atom was executed on 392 * 393 * Subset of atom state that can be available after kbase_jd_done_nolock() is called 394 * on that atom. A copy must be taken via kbasep_js_atom_retained_state_copy(), 395 * because the original atom could disappear. 396 */ 397 struct kbasep_js_atom_retained_state { 398 /* Event code - to determine whether the atom has finished */ 399 enum base_jd_event_code event_code; 400 /* core requirements */ 401 base_jd_core_req core_req; 402 /* priority */ 403 int sched_priority; 404 /* Core group atom was executed on */ 405 u32 device_nr; 406 407 }; 408 409 /* 410 * Value signifying 'no retry on a slot required' for: 411 * - kbase_js_atom_retained_state::retry_submit_on_slot 412 * - kbase_jd_atom::retry_submit_on_slot 413 */ 414 #define KBASEP_JS_RETRY_SUBMIT_SLOT_INVALID (-1) 415 416 /* 417 * base_jd_core_req value signifying 'invalid' for a 418 * kbase_jd_atom_retained_state. See kbase_atom_retained_state_is_valid() 419 */ 420 #define KBASEP_JS_ATOM_RETAINED_STATE_CORE_REQ_INVALID BASE_JD_REQ_DEP 421 422 /* 423 * The JS timer resolution, in microseconds 424 * Any non-zero difference in time will be at least this size. 425 */ 426 #define KBASEP_JS_TICK_RESOLUTION_US 1 427 428 /** 429 * struct kbase_jsctx_slot_tracking - Job Scheduling tracking of a context's 430 * use of a job slot 431 * @blocked: bitmap of priorities that this slot is blocked at 432 * @atoms_pulled: counts of atoms that have been pulled from this slot, 433 * across all priority levels 434 * @atoms_pulled_pri: counts of atoms that have been pulled from this slot, per 435 * priority level 436 * 437 * Controls how a slot from the &struct kbase_context's jsctx_queue is managed, 438 * for example to ensure correct ordering of atoms when atoms of different 439 * priorities are unpulled. 440 */ 441 struct kbase_jsctx_slot_tracking { 442 kbase_js_prio_bitmap_t blocked; 443 atomic_t atoms_pulled; 444 int atoms_pulled_pri[KBASE_JS_ATOM_SCHED_PRIO_COUNT]; 445 }; 446 447 #endif /* _KBASE_JS_DEFS_H_ */ 448