1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ 2 /* 3 * 4 * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved. 5 * 6 * This program is free software and is provided to you under the terms of the 7 * GNU General Public License version 2 as published by the Free Software 8 * Foundation, and any use by you of this program is subject to the terms 9 * of such GNU license. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, you can access it online at 18 * http://www.gnu.org/licenses/gpl-2.0.html. 19 * 20 */ 21 22 #ifndef _KBASE_RESET_GPU_H_ 23 #define _KBASE_RESET_GPU_H_ 24 25 /** 26 * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst 27 * the current thread is accessing the GPU, 28 * and wait for any in-flight reset to 29 * finish. 30 * @kbdev: Device pointer 31 * 32 * This should be used when a potential access to the HW is going to be made 33 * from a non-atomic context. 34 * 35 * It will wait for any in-flight reset to finish before returning. Hence, 36 * correct lock ordering must be observed with respect to the calling thread 37 * and the reset worker thread. 38 * 39 * This does not synchronize general access to the HW, and so multiple threads 40 * can prevent GPU reset concurrently, whilst not being serialized. This is 41 * advantageous as the threads can make this call at points where they do not 42 * know for sure yet whether they will indeed access the GPU (for example, to 43 * respect lock ordering), without unnecessarily blocking others. 44 * 45 * Threads must still use other synchronization to ensure they access the HW 46 * consistently, at a point where they are certain it needs to be accessed. 47 * 48 * On success, ensure that when access to the GPU by the caller thread has 49 * finished, that it calls kbase_reset_gpu_allow() again to allow resets to 50 * happen. 51 * 52 * This may return a failure in cases such as a previous failure to reset the 53 * GPU within a reasonable time. If that happens, the GPU might be 54 * non-operational and the caller should not attempt any further access. 55 * 56 * Note: 57 * For atomic context, instead check kbase_reset_gpu_is_active(). 58 * 59 * Return: 0 on success, or negative error code on failure. 60 */ 61 int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev); 62 63 /** 64 * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting 65 * whilst the current thread is accessing the 66 * GPU, unless a reset is already in progress. 67 * @kbdev: Device pointer 68 * 69 * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an 70 * existing reset to complete. This can be used on codepaths that the Reset 71 * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would 72 * otherwise deadlock. 73 * 74 * Instead, a reset that is currently happening will cause this function to 75 * return an error code indicating that, and further resets will not have been 76 * prevented. 77 * 78 * In such cases, the caller must check for -EAGAIN, and take similar actions 79 * as for handling reset in atomic context. That is, they must cancel any 80 * actions that depended on reset being prevented, possibly deferring them 81 * until after the reset. 82 * 83 * Otherwise a successful return means that the caller can continue its actions 84 * safely in the knowledge that reset is prevented, and the reset worker will 85 * correctly wait instead of deadlocking against this thread. 86 * 87 * On success, ensure that when access to the GPU by the caller thread has 88 * finished, that it calls kbase_reset_gpu_allow() again to allow resets to 89 * happen. 90 * 91 * Refer to kbase_reset_gpu_prevent_and_wait() for more information. 92 * 93 * Return: 0 on success. -EAGAIN if a reset is currently happening. Other 94 * negative error codes on failure, where -ENOMEM indicates that GPU reset 95 * had failed. 96 */ 97 int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev); 98 99 /** 100 * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been 101 * previously prevented. 102 * @kbdev: Device pointer 103 * 104 * This should be used when a potential access to the HW has finished from a 105 * non-atomic context. 106 * 107 * It must be used from the same thread that originally made a previously call 108 * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another 109 * thread. 110 */ 111 void kbase_reset_gpu_allow(struct kbase_device *kbdev); 112 113 /** 114 * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is 115 * currently prevented by the current 116 * thread. 117 * @kbdev: Device pointer 118 * 119 * Make debugging checks that the current thread has made a call to 120 * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to 121 * kbase_reset_gpu_allow(). 122 * 123 * CONFIG_LOCKDEP is required to prove that reset is indeed 124 * prevented. Otherwise only limited debugging checks can be made. 125 */ 126 void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev); 127 128 /** 129 * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that 130 * either GPU reset previously 131 * failed, or is currently 132 * prevented. 133 * 134 * @kbdev: Device pointer 135 * 136 * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where 137 * reset was not prevented due to a failure, yet we still need to execute the 138 * cleanup code following. 139 * 140 * Cleanup code following this call must handle any inconsistent state modified 141 * by the failed GPU reset, and must timeout any blocking operations instead of 142 * waiting forever. 143 */ 144 void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev); 145 146 /** 147 * RESET_FLAGS_NONE - Flags for kbase_prepare_to_reset_gpu 148 */ 149 #define RESET_FLAGS_NONE (0U) 150 151 /* This reset should be treated as an unrecoverable error by HW counter logic */ 152 #define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0)) 153 154 /** 155 * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU. 156 * @kbdev: Device pointer 157 * @flags: Bitfield indicating impact of reset (see flag defines) 158 * 159 * Caller is expected to hold the kbdev->hwaccess_lock. 160 * 161 * Return: a boolean which should be interpreted as follows: 162 * - true - Prepared for reset, kbase_reset_gpu should be called. 163 * - false - Another thread is performing a reset, kbase_reset_gpu should 164 * not be called. 165 */ 166 bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev, 167 unsigned int flags); 168 169 /** 170 * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU. 171 * @kbdev: Device pointer 172 * @flags: Bitfield indicating impact of reset (see flag defines) 173 * 174 * Return: a boolean which should be interpreted as follows: 175 * - true - Prepared for reset, kbase_reset_gpu should be called. 176 * - false - Another thread is performing a reset, kbase_reset_gpu should 177 * not be called. 178 */ 179 bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags); 180 181 /** 182 * kbase_reset_gpu - Reset the GPU 183 * @kbdev: Device pointer 184 * 185 * This function should be called after kbase_prepare_to_reset_gpu if it returns 186 * true. It should never be called without a corresponding call to 187 * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). 188 * 189 * After this function is called the caller should call kbase_reset_gpu_wait() 190 * to know when the reset has completed. 191 */ 192 void kbase_reset_gpu(struct kbase_device *kbdev); 193 194 /** 195 * kbase_reset_gpu_locked - Reset the GPU 196 * @kbdev: Device pointer 197 * 198 * This function should be called after kbase_prepare_to_reset_gpu_locked if it 199 * returns true. It should never be called without a corresponding call to 200 * kbase_prepare_to_reset_gpu (only on Job Manager GPUs). 201 * Caller is expected to hold the kbdev->hwaccess_lock. 202 * 203 * After this function is called, the caller should call kbase_reset_gpu_wait() 204 * to know when the reset has completed. 205 */ 206 void kbase_reset_gpu_locked(struct kbase_device *kbdev); 207 208 /** 209 * kbase_reset_gpu_silent - Reset the GPU silently 210 * @kbdev: Device pointer 211 * 212 * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs) 213 * and don't emit messages into the kernel log while doing the reset. 214 * 215 * This function should be used in cases where we are doing a controlled reset 216 * of the GPU as part of normal processing (e.g. exiting protected mode) where 217 * the driver will have ensured the scheduler has been idled and all other 218 * users of the GPU (e.g. instrumentation) have been suspended. 219 * 220 * Return: 0 if the reset was started successfully 221 * -EAGAIN if another reset is currently in progress 222 */ 223 int kbase_reset_gpu_silent(struct kbase_device *kbdev); 224 225 /** 226 * kbase_reset_gpu_is_active - Reports if the GPU is being reset 227 * @kbdev: Device pointer 228 * 229 * Any changes made to the HW when this returns true may be lost, overwritten 230 * or corrupted. 231 * 232 * Note that unless appropriate locks are held when using this function, the 233 * state could change immediately afterwards. 234 * 235 * Return: True if the GPU is in the process of being reset. 236 */ 237 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev); 238 239 /** 240 * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending 241 * 242 * @kbdev: Device pointer 243 * 244 * Note that unless appropriate locks are held when using this function, the 245 * state could change immediately afterwards. 246 * 247 * Return: True if the GPU reset isn't pending. 248 */ 249 bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev); 250 251 /** 252 * kbase_reset_gpu_wait - Wait for a GPU reset to complete 253 * @kbdev: Device pointer 254 * 255 * This function may wait indefinitely. 256 * 257 * Return: 0 if successful or a negative error code on failure. 258 */ 259 int kbase_reset_gpu_wait(struct kbase_device *kbdev); 260 261 /** 262 * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism. 263 * 264 * @kbdev: Device pointer 265 * 266 * Return: 0 if successful or a negative error code on failure. 267 */ 268 int kbase_reset_gpu_init(struct kbase_device *kbdev); 269 270 /** 271 * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism. 272 * 273 * @kbdev: Device pointer 274 */ 275 void kbase_reset_gpu_term(struct kbase_device *kbdev); 276 277 #endif 278