xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/mali_kbase_reset_gpu.h (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2 /*
3  *
4  * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 #ifndef _KBASE_RESET_GPU_H_
23 #define _KBASE_RESET_GPU_H_
24 
25 /**
26  * kbase_reset_gpu_prevent_and_wait - Prevent GPU resets from starting whilst
27  *                                    the current thread is accessing the GPU,
28  *                                    and wait for any in-flight reset to
29  *                                    finish.
30  * @kbdev: Device pointer
31  *
32  * This should be used when a potential access to the HW is going to be made
33  * from a non-atomic context.
34  *
35  * It will wait for any in-flight reset to finish before returning. Hence,
36  * correct lock ordering must be observed with respect to the calling thread
37  * and the reset worker thread.
38  *
39  * This does not synchronize general access to the HW, and so multiple threads
40  * can prevent GPU reset concurrently, whilst not being serialized. This is
41  * advantageous as the threads can make this call at points where they do not
42  * know for sure yet whether they will indeed access the GPU (for example, to
43  * respect lock ordering), without unnecessarily blocking others.
44  *
45  * Threads must still use other synchronization to ensure they access the HW
46  * consistently, at a point where they are certain it needs to be accessed.
47  *
48  * On success, ensure that when access to the GPU by the caller thread has
49  * finished, that it calls kbase_reset_gpu_allow() again to allow resets to
50  * happen.
51  *
52  * This may return a failure in cases such as a previous failure to reset the
53  * GPU within a reasonable time. If that happens, the GPU might be
54  * non-operational and the caller should not attempt any further access.
55  *
56  * Note:
57  * For atomic context, instead check kbase_reset_gpu_is_active().
58  *
59  * Return: 0 on success, or negative error code on failure.
60  */
61 int kbase_reset_gpu_prevent_and_wait(struct kbase_device *kbdev);
62 
63 /**
64  * kbase_reset_gpu_try_prevent - Attempt to prevent GPU resets from starting
65  *                               whilst the current thread is accessing the
66  *                               GPU, unless a reset is already in progress.
67  * @kbdev: Device pointer
68  *
69  * Similar to kbase_reset_gpu_prevent_and_wait(), but it does not wait for an
70  * existing reset to complete. This can be used on codepaths that the Reset
71  * worker waits on, where use of kbase_reset_gpu_prevent_and_wait() would
72  * otherwise deadlock.
73  *
74  * Instead, a reset that is currently happening will cause this function to
75  * return an error code indicating that, and further resets will not have been
76  * prevented.
77  *
78  * In such cases, the caller must check for -EAGAIN, and take similar actions
79  * as for handling reset in atomic context. That is, they must cancel any
80  * actions that depended on reset being prevented, possibly deferring them
81  * until after the reset.
82  *
83  * Otherwise a successful return means that the caller can continue its actions
84  * safely in the knowledge that reset is prevented, and the reset worker will
85  * correctly wait instead of deadlocking against this thread.
86  *
87  * On success, ensure that when access to the GPU by the caller thread has
88  * finished, that it calls kbase_reset_gpu_allow() again to allow resets to
89  * happen.
90  *
91  * Refer to kbase_reset_gpu_prevent_and_wait() for more information.
92  *
93  * Return: 0 on success. -EAGAIN if a reset is currently happening. Other
94  * negative error codes on failure, where -ENOMEM indicates that GPU reset
95  * had failed.
96  */
97 int kbase_reset_gpu_try_prevent(struct kbase_device *kbdev);
98 
99 /**
100  * kbase_reset_gpu_allow - Allow GPU resets to happen again after having been
101  *                         previously prevented.
102  * @kbdev: Device pointer
103  *
104  * This should be used when a potential access to the HW has finished from a
105  * non-atomic context.
106  *
107  * It must be used from the same thread that originally made a previously call
108  * to kbase_reset_gpu_prevent_and_wait(). It must not be deferred to another
109  * thread.
110  */
111 void kbase_reset_gpu_allow(struct kbase_device *kbdev);
112 
113 /**
114  * kbase_reset_gpu_assert_prevented - Make debugging checks that GPU reset is
115  *                                    currently prevented by the current
116  *                                    thread.
117  * @kbdev: Device pointer
118  *
119  * Make debugging checks that the current thread has made a call to
120  * kbase_reset_gpu_prevent_and_wait(), but has yet to make a subsequent call to
121  * kbase_reset_gpu_allow().
122  *
123  * CONFIG_LOCKDEP is required to prove that reset is indeed
124  * prevented. Otherwise only limited debugging checks can be made.
125  */
126 void kbase_reset_gpu_assert_prevented(struct kbase_device *kbdev);
127 
128 /**
129  * kbase_reset_gpu_assert_failed_or_prevented - Make debugging checks that
130  *                                              either GPU reset previously
131  *                                              failed, or is currently
132  *                                              prevented.
133  *
134  * @kbdev: Device pointer
135  *
136  * As with kbase_reset_gpu_assert_prevented(), but also allow for paths where
137  * reset was not prevented due to a failure, yet we still need to execute the
138  * cleanup code following.
139  *
140  * Cleanup code following this call must handle any inconsistent state modified
141  * by the failed GPU reset, and must timeout any blocking operations instead of
142  * waiting forever.
143  */
144 void kbase_reset_gpu_assert_failed_or_prevented(struct kbase_device *kbdev);
145 
146 /**
147  * RESET_FLAGS_NONE - Flags for kbase_prepare_to_reset_gpu
148  */
149 #define RESET_FLAGS_NONE (0U)
150 
151 /* This reset should be treated as an unrecoverable error by HW counter logic */
152 #define RESET_FLAGS_HWC_UNRECOVERABLE_ERROR ((unsigned int)(1 << 0))
153 
154 /**
155  * kbase_prepare_to_reset_gpu_locked - Prepare for resetting the GPU.
156  * @kbdev: Device pointer
157  * @flags: Bitfield indicating impact of reset (see flag defines)
158  *
159  * Caller is expected to hold the kbdev->hwaccess_lock.
160  *
161  * Return: a boolean which should be interpreted as follows:
162  * - true  - Prepared for reset, kbase_reset_gpu should be called.
163  * - false - Another thread is performing a reset, kbase_reset_gpu should
164  *           not be called.
165  */
166 bool kbase_prepare_to_reset_gpu_locked(struct kbase_device *kbdev,
167 				       unsigned int flags);
168 
169 /**
170  * kbase_prepare_to_reset_gpu - Prepare for resetting the GPU.
171  * @kbdev: Device pointer
172  * @flags: Bitfield indicating impact of reset (see flag defines)
173  *
174  * Return: a boolean which should be interpreted as follows:
175  * - true  - Prepared for reset, kbase_reset_gpu should be called.
176  * - false - Another thread is performing a reset, kbase_reset_gpu should
177  *           not be called.
178  */
179 bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags);
180 
181 /**
182  * kbase_reset_gpu - Reset the GPU
183  * @kbdev: Device pointer
184  *
185  * This function should be called after kbase_prepare_to_reset_gpu if it returns
186  * true. It should never be called without a corresponding call to
187  * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
188  *
189  * After this function is called the caller should call kbase_reset_gpu_wait()
190  * to know when the reset has completed.
191  */
192 void kbase_reset_gpu(struct kbase_device *kbdev);
193 
194 /**
195  * kbase_reset_gpu_locked - Reset the GPU
196  * @kbdev: Device pointer
197  *
198  * This function should be called after kbase_prepare_to_reset_gpu_locked if it
199  * returns true. It should never be called without a corresponding call to
200  * kbase_prepare_to_reset_gpu (only on Job Manager GPUs).
201  * Caller is expected to hold the kbdev->hwaccess_lock.
202  *
203  * After this function is called, the caller should call kbase_reset_gpu_wait()
204  * to know when the reset has completed.
205  */
206 void kbase_reset_gpu_locked(struct kbase_device *kbdev);
207 
208 /**
209  * kbase_reset_gpu_silent - Reset the GPU silently
210  * @kbdev: Device pointer
211  *
212  * Reset the GPU without trying to cancel jobs (applicable to Job Manager GPUs)
213  * and don't emit messages into the kernel log while doing the reset.
214  *
215  * This function should be used in cases where we are doing a controlled reset
216  * of the GPU as part of normal processing (e.g. exiting protected mode) where
217  * the driver will have ensured the scheduler has been idled and all other
218  * users of the GPU (e.g. instrumentation) have been suspended.
219  *
220  * Return: 0 if the reset was started successfully
221  *         -EAGAIN if another reset is currently in progress
222  */
223 int kbase_reset_gpu_silent(struct kbase_device *kbdev);
224 
225 /**
226  * kbase_reset_gpu_is_active - Reports if the GPU is being reset
227  * @kbdev: Device pointer
228  *
229  * Any changes made to the HW when this returns true may be lost, overwritten
230  * or corrupted.
231  *
232  * Note that unless appropriate locks are held when using this function, the
233  * state could change immediately afterwards.
234  *
235  * Return: True if the GPU is in the process of being reset.
236  */
237 bool kbase_reset_gpu_is_active(struct kbase_device *kbdev);
238 
239 /**
240  * kbase_reset_gpu_not_pending - Reports if the GPU reset isn't pending
241  *
242  * @kbdev: Device pointer
243  *
244  * Note that unless appropriate locks are held when using this function, the
245  * state could change immediately afterwards.
246  *
247  * Return: True if the GPU reset isn't pending.
248  */
249 bool kbase_reset_gpu_is_not_pending(struct kbase_device *kbdev);
250 
251 /**
252  * kbase_reset_gpu_wait - Wait for a GPU reset to complete
253  * @kbdev: Device pointer
254  *
255  * This function may wait indefinitely.
256  *
257  * Return: 0 if successful or a negative error code on failure.
258  */
259 int kbase_reset_gpu_wait(struct kbase_device *kbdev);
260 
261 /**
262  * kbase_reset_gpu_init - Initialize the GPU reset handling mechanism.
263  *
264  * @kbdev: Device pointer
265  *
266  * Return: 0 if successful or a negative error code on failure.
267  */
268 int kbase_reset_gpu_init(struct kbase_device *kbdev);
269 
270 /**
271  * kbase_reset_gpu_term - Terminate the GPU reset handling mechanism.
272  *
273  * @kbdev: Device pointer
274  */
275 void kbase_reset_gpu_term(struct kbase_device *kbdev);
276 
277 #endif
278