1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun
3*4882a593Smuzhiyun /*
4*4882a593Smuzhiyun * Copyright 2016-2019 HabanaLabs, Ltd.
5*4882a593Smuzhiyun * All Rights Reserved.
6*4882a593Smuzhiyun */
7*4882a593Smuzhiyun
8*4882a593Smuzhiyun #include <uapi/misc/habanalabs.h>
9*4882a593Smuzhiyun #include "habanalabs.h"
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <linux/uaccess.h>
12*4882a593Smuzhiyun #include <linux/slab.h>
13*4882a593Smuzhiyun
14*4882a593Smuzhiyun #define HL_CS_FLAGS_SIG_WAIT (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
15*4882a593Smuzhiyun
16*4882a593Smuzhiyun static void job_wq_completion(struct work_struct *work);
17*4882a593Smuzhiyun static long _hl_cs_wait_ioctl(struct hl_device *hdev,
18*4882a593Smuzhiyun struct hl_ctx *ctx, u64 timeout_us, u64 seq);
19*4882a593Smuzhiyun static void cs_do_release(struct kref *ref);
20*4882a593Smuzhiyun
hl_sob_reset(struct kref * ref)21*4882a593Smuzhiyun static void hl_sob_reset(struct kref *ref)
22*4882a593Smuzhiyun {
23*4882a593Smuzhiyun struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
24*4882a593Smuzhiyun kref);
25*4882a593Smuzhiyun struct hl_device *hdev = hw_sob->hdev;
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun hdev->asic_funcs->reset_sob(hdev, hw_sob);
28*4882a593Smuzhiyun }
29*4882a593Smuzhiyun
hl_sob_reset_error(struct kref * ref)30*4882a593Smuzhiyun void hl_sob_reset_error(struct kref *ref)
31*4882a593Smuzhiyun {
32*4882a593Smuzhiyun struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob,
33*4882a593Smuzhiyun kref);
34*4882a593Smuzhiyun struct hl_device *hdev = hw_sob->hdev;
35*4882a593Smuzhiyun
36*4882a593Smuzhiyun dev_crit(hdev->dev,
37*4882a593Smuzhiyun "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
38*4882a593Smuzhiyun hw_sob->q_idx, hw_sob->sob_id);
39*4882a593Smuzhiyun }
40*4882a593Smuzhiyun
hl_fence_release(struct kref * kref)41*4882a593Smuzhiyun static void hl_fence_release(struct kref *kref)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun struct hl_fence *fence =
44*4882a593Smuzhiyun container_of(kref, struct hl_fence, refcount);
45*4882a593Smuzhiyun struct hl_cs_compl *hl_cs_cmpl =
46*4882a593Smuzhiyun container_of(fence, struct hl_cs_compl, base_fence);
47*4882a593Smuzhiyun struct hl_device *hdev = hl_cs_cmpl->hdev;
48*4882a593Smuzhiyun
49*4882a593Smuzhiyun /* EBUSY means the CS was never submitted and hence we don't have
50*4882a593Smuzhiyun * an attached hw_sob object that we should handle here
51*4882a593Smuzhiyun */
52*4882a593Smuzhiyun if (fence->error == -EBUSY)
53*4882a593Smuzhiyun goto free;
54*4882a593Smuzhiyun
55*4882a593Smuzhiyun if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
56*4882a593Smuzhiyun (hl_cs_cmpl->type == CS_TYPE_WAIT)) {
57*4882a593Smuzhiyun
58*4882a593Smuzhiyun dev_dbg(hdev->dev,
59*4882a593Smuzhiyun "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
60*4882a593Smuzhiyun hl_cs_cmpl->cs_seq,
61*4882a593Smuzhiyun hl_cs_cmpl->type,
62*4882a593Smuzhiyun hl_cs_cmpl->hw_sob->sob_id,
63*4882a593Smuzhiyun hl_cs_cmpl->sob_val);
64*4882a593Smuzhiyun
65*4882a593Smuzhiyun /*
66*4882a593Smuzhiyun * A signal CS can get completion while the corresponding wait
67*4882a593Smuzhiyun * for signal CS is on its way to the PQ. The wait for signal CS
68*4882a593Smuzhiyun * will get stuck if the signal CS incremented the SOB to its
69*4882a593Smuzhiyun * max value and there are no pending (submitted) waits on this
70*4882a593Smuzhiyun * SOB.
71*4882a593Smuzhiyun * We do the following to void this situation:
72*4882a593Smuzhiyun * 1. The wait for signal CS must get a ref for the signal CS as
73*4882a593Smuzhiyun * soon as possible in cs_ioctl_signal_wait() and put it
74*4882a593Smuzhiyun * before being submitted to the PQ but after it incremented
75*4882a593Smuzhiyun * the SOB refcnt in init_signal_wait_cs().
76*4882a593Smuzhiyun * 2. Signal/Wait for signal CS will decrement the SOB refcnt
77*4882a593Smuzhiyun * here.
78*4882a593Smuzhiyun * These two measures guarantee that the wait for signal CS will
79*4882a593Smuzhiyun * reset the SOB upon completion rather than the signal CS and
80*4882a593Smuzhiyun * hence the above scenario is avoided.
81*4882a593Smuzhiyun */
82*4882a593Smuzhiyun kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
83*4882a593Smuzhiyun }
84*4882a593Smuzhiyun
85*4882a593Smuzhiyun free:
86*4882a593Smuzhiyun kfree(hl_cs_cmpl);
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
hl_fence_put(struct hl_fence * fence)89*4882a593Smuzhiyun void hl_fence_put(struct hl_fence *fence)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun if (fence)
92*4882a593Smuzhiyun kref_put(&fence->refcount, hl_fence_release);
93*4882a593Smuzhiyun }
94*4882a593Smuzhiyun
hl_fence_get(struct hl_fence * fence)95*4882a593Smuzhiyun void hl_fence_get(struct hl_fence *fence)
96*4882a593Smuzhiyun {
97*4882a593Smuzhiyun if (fence)
98*4882a593Smuzhiyun kref_get(&fence->refcount);
99*4882a593Smuzhiyun }
100*4882a593Smuzhiyun
hl_fence_init(struct hl_fence * fence)101*4882a593Smuzhiyun static void hl_fence_init(struct hl_fence *fence)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun kref_init(&fence->refcount);
104*4882a593Smuzhiyun fence->error = 0;
105*4882a593Smuzhiyun init_completion(&fence->completion);
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun
cs_get(struct hl_cs * cs)108*4882a593Smuzhiyun static void cs_get(struct hl_cs *cs)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun kref_get(&cs->refcount);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun
cs_get_unless_zero(struct hl_cs * cs)113*4882a593Smuzhiyun static int cs_get_unless_zero(struct hl_cs *cs)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun return kref_get_unless_zero(&cs->refcount);
116*4882a593Smuzhiyun }
117*4882a593Smuzhiyun
cs_put(struct hl_cs * cs)118*4882a593Smuzhiyun static void cs_put(struct hl_cs *cs)
119*4882a593Smuzhiyun {
120*4882a593Smuzhiyun kref_put(&cs->refcount, cs_do_release);
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun
is_cb_patched(struct hl_device * hdev,struct hl_cs_job * job)123*4882a593Smuzhiyun static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun /*
126*4882a593Smuzhiyun * Patched CB is created for external queues jobs, and for H/W queues
127*4882a593Smuzhiyun * jobs if the user CB was allocated by driver and MMU is disabled.
128*4882a593Smuzhiyun */
129*4882a593Smuzhiyun return (job->queue_type == QUEUE_TYPE_EXT ||
130*4882a593Smuzhiyun (job->queue_type == QUEUE_TYPE_HW &&
131*4882a593Smuzhiyun job->is_kernel_allocated_cb &&
132*4882a593Smuzhiyun !hdev->mmu_enable));
133*4882a593Smuzhiyun }
134*4882a593Smuzhiyun
135*4882a593Smuzhiyun /*
136*4882a593Smuzhiyun * cs_parser - parse the user command submission
137*4882a593Smuzhiyun *
138*4882a593Smuzhiyun * @hpriv : pointer to the private data of the fd
139*4882a593Smuzhiyun * @job : pointer to the job that holds the command submission info
140*4882a593Smuzhiyun *
141*4882a593Smuzhiyun * The function parses the command submission of the user. It calls the
142*4882a593Smuzhiyun * ASIC specific parser, which returns a list of memory blocks to send
143*4882a593Smuzhiyun * to the device as different command buffers
144*4882a593Smuzhiyun *
145*4882a593Smuzhiyun */
cs_parser(struct hl_fpriv * hpriv,struct hl_cs_job * job)146*4882a593Smuzhiyun static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
147*4882a593Smuzhiyun {
148*4882a593Smuzhiyun struct hl_device *hdev = hpriv->hdev;
149*4882a593Smuzhiyun struct hl_cs_parser parser;
150*4882a593Smuzhiyun int rc;
151*4882a593Smuzhiyun
152*4882a593Smuzhiyun parser.ctx_id = job->cs->ctx->asid;
153*4882a593Smuzhiyun parser.cs_sequence = job->cs->sequence;
154*4882a593Smuzhiyun parser.job_id = job->id;
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun parser.hw_queue_id = job->hw_queue_id;
157*4882a593Smuzhiyun parser.job_userptr_list = &job->userptr_list;
158*4882a593Smuzhiyun parser.patched_cb = NULL;
159*4882a593Smuzhiyun parser.user_cb = job->user_cb;
160*4882a593Smuzhiyun parser.user_cb_size = job->user_cb_size;
161*4882a593Smuzhiyun parser.queue_type = job->queue_type;
162*4882a593Smuzhiyun parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
163*4882a593Smuzhiyun job->patched_cb = NULL;
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun rc = hdev->asic_funcs->cs_parser(hdev, &parser);
166*4882a593Smuzhiyun
167*4882a593Smuzhiyun if (is_cb_patched(hdev, job)) {
168*4882a593Smuzhiyun if (!rc) {
169*4882a593Smuzhiyun job->patched_cb = parser.patched_cb;
170*4882a593Smuzhiyun job->job_cb_size = parser.patched_cb_size;
171*4882a593Smuzhiyun job->contains_dma_pkt = parser.contains_dma_pkt;
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun spin_lock(&job->patched_cb->lock);
174*4882a593Smuzhiyun job->patched_cb->cs_cnt++;
175*4882a593Smuzhiyun spin_unlock(&job->patched_cb->lock);
176*4882a593Smuzhiyun }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun /*
179*4882a593Smuzhiyun * Whether the parsing worked or not, we don't need the
180*4882a593Smuzhiyun * original CB anymore because it was already parsed and
181*4882a593Smuzhiyun * won't be accessed again for this CS
182*4882a593Smuzhiyun */
183*4882a593Smuzhiyun spin_lock(&job->user_cb->lock);
184*4882a593Smuzhiyun job->user_cb->cs_cnt--;
185*4882a593Smuzhiyun spin_unlock(&job->user_cb->lock);
186*4882a593Smuzhiyun hl_cb_put(job->user_cb);
187*4882a593Smuzhiyun job->user_cb = NULL;
188*4882a593Smuzhiyun } else if (!rc) {
189*4882a593Smuzhiyun job->job_cb_size = job->user_cb_size;
190*4882a593Smuzhiyun }
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun return rc;
193*4882a593Smuzhiyun }
194*4882a593Smuzhiyun
free_job(struct hl_device * hdev,struct hl_cs_job * job)195*4882a593Smuzhiyun static void free_job(struct hl_device *hdev, struct hl_cs_job *job)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun struct hl_cs *cs = job->cs;
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun if (is_cb_patched(hdev, job)) {
200*4882a593Smuzhiyun hl_userptr_delete_list(hdev, &job->userptr_list);
201*4882a593Smuzhiyun
202*4882a593Smuzhiyun /*
203*4882a593Smuzhiyun * We might arrive here from rollback and patched CB wasn't
204*4882a593Smuzhiyun * created, so we need to check it's not NULL
205*4882a593Smuzhiyun */
206*4882a593Smuzhiyun if (job->patched_cb) {
207*4882a593Smuzhiyun spin_lock(&job->patched_cb->lock);
208*4882a593Smuzhiyun job->patched_cb->cs_cnt--;
209*4882a593Smuzhiyun spin_unlock(&job->patched_cb->lock);
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun hl_cb_put(job->patched_cb);
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun }
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
216*4882a593Smuzhiyun * enabled, the user CB isn't released in cs_parser() and thus should be
217*4882a593Smuzhiyun * released here.
218*4882a593Smuzhiyun */
219*4882a593Smuzhiyun if (job->queue_type == QUEUE_TYPE_HW &&
220*4882a593Smuzhiyun job->is_kernel_allocated_cb && hdev->mmu_enable) {
221*4882a593Smuzhiyun spin_lock(&job->user_cb->lock);
222*4882a593Smuzhiyun job->user_cb->cs_cnt--;
223*4882a593Smuzhiyun spin_unlock(&job->user_cb->lock);
224*4882a593Smuzhiyun
225*4882a593Smuzhiyun hl_cb_put(job->user_cb);
226*4882a593Smuzhiyun }
227*4882a593Smuzhiyun
228*4882a593Smuzhiyun /*
229*4882a593Smuzhiyun * This is the only place where there can be multiple threads
230*4882a593Smuzhiyun * modifying the list at the same time
231*4882a593Smuzhiyun */
232*4882a593Smuzhiyun spin_lock(&cs->job_lock);
233*4882a593Smuzhiyun list_del(&job->cs_node);
234*4882a593Smuzhiyun spin_unlock(&cs->job_lock);
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun hl_debugfs_remove_job(hdev, job);
237*4882a593Smuzhiyun
238*4882a593Smuzhiyun if (job->queue_type == QUEUE_TYPE_EXT ||
239*4882a593Smuzhiyun job->queue_type == QUEUE_TYPE_HW)
240*4882a593Smuzhiyun cs_put(cs);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun kfree(job);
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun
cs_counters_aggregate(struct hl_device * hdev,struct hl_ctx * ctx)245*4882a593Smuzhiyun static void cs_counters_aggregate(struct hl_device *hdev, struct hl_ctx *ctx)
246*4882a593Smuzhiyun {
247*4882a593Smuzhiyun hdev->aggregated_cs_counters.device_in_reset_drop_cnt +=
248*4882a593Smuzhiyun ctx->cs_counters.device_in_reset_drop_cnt;
249*4882a593Smuzhiyun hdev->aggregated_cs_counters.out_of_mem_drop_cnt +=
250*4882a593Smuzhiyun ctx->cs_counters.out_of_mem_drop_cnt;
251*4882a593Smuzhiyun hdev->aggregated_cs_counters.parsing_drop_cnt +=
252*4882a593Smuzhiyun ctx->cs_counters.parsing_drop_cnt;
253*4882a593Smuzhiyun hdev->aggregated_cs_counters.queue_full_drop_cnt +=
254*4882a593Smuzhiyun ctx->cs_counters.queue_full_drop_cnt;
255*4882a593Smuzhiyun hdev->aggregated_cs_counters.max_cs_in_flight_drop_cnt +=
256*4882a593Smuzhiyun ctx->cs_counters.max_cs_in_flight_drop_cnt;
257*4882a593Smuzhiyun }
258*4882a593Smuzhiyun
cs_do_release(struct kref * ref)259*4882a593Smuzhiyun static void cs_do_release(struct kref *ref)
260*4882a593Smuzhiyun {
261*4882a593Smuzhiyun struct hl_cs *cs = container_of(ref, struct hl_cs,
262*4882a593Smuzhiyun refcount);
263*4882a593Smuzhiyun struct hl_device *hdev = cs->ctx->hdev;
264*4882a593Smuzhiyun struct hl_cs_job *job, *tmp;
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun cs->completed = true;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun /*
269*4882a593Smuzhiyun * Although if we reached here it means that all external jobs have
270*4882a593Smuzhiyun * finished, because each one of them took refcnt to CS, we still
271*4882a593Smuzhiyun * need to go over the internal jobs and free them. Otherwise, we
272*4882a593Smuzhiyun * will have leaked memory and what's worse, the CS object (and
273*4882a593Smuzhiyun * potentially the CTX object) could be released, while the JOB
274*4882a593Smuzhiyun * still holds a pointer to them (but no reference).
275*4882a593Smuzhiyun */
276*4882a593Smuzhiyun list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
277*4882a593Smuzhiyun free_job(hdev, job);
278*4882a593Smuzhiyun
279*4882a593Smuzhiyun /* We also need to update CI for internal queues */
280*4882a593Smuzhiyun if (cs->submitted) {
281*4882a593Smuzhiyun hdev->asic_funcs->hw_queues_lock(hdev);
282*4882a593Smuzhiyun
283*4882a593Smuzhiyun hdev->cs_active_cnt--;
284*4882a593Smuzhiyun if (!hdev->cs_active_cnt) {
285*4882a593Smuzhiyun struct hl_device_idle_busy_ts *ts;
286*4882a593Smuzhiyun
287*4882a593Smuzhiyun ts = &hdev->idle_busy_ts_arr[hdev->idle_busy_ts_idx++];
288*4882a593Smuzhiyun ts->busy_to_idle_ts = ktime_get();
289*4882a593Smuzhiyun
290*4882a593Smuzhiyun if (hdev->idle_busy_ts_idx == HL_IDLE_BUSY_TS_ARR_SIZE)
291*4882a593Smuzhiyun hdev->idle_busy_ts_idx = 0;
292*4882a593Smuzhiyun } else if (hdev->cs_active_cnt < 0) {
293*4882a593Smuzhiyun dev_crit(hdev->dev, "CS active cnt %d is negative\n",
294*4882a593Smuzhiyun hdev->cs_active_cnt);
295*4882a593Smuzhiyun }
296*4882a593Smuzhiyun
297*4882a593Smuzhiyun hdev->asic_funcs->hw_queues_unlock(hdev);
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun hl_int_hw_queue_update_ci(cs);
300*4882a593Smuzhiyun
301*4882a593Smuzhiyun spin_lock(&hdev->hw_queues_mirror_lock);
302*4882a593Smuzhiyun /* remove CS from hw_queues mirror list */
303*4882a593Smuzhiyun list_del_init(&cs->mirror_node);
304*4882a593Smuzhiyun spin_unlock(&hdev->hw_queues_mirror_lock);
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun /*
307*4882a593Smuzhiyun * Don't cancel TDR in case this CS was timedout because we
308*4882a593Smuzhiyun * might be running from the TDR context
309*4882a593Smuzhiyun */
310*4882a593Smuzhiyun if ((!cs->timedout) &&
311*4882a593Smuzhiyun (hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT)) {
312*4882a593Smuzhiyun struct hl_cs *next;
313*4882a593Smuzhiyun
314*4882a593Smuzhiyun if (cs->tdr_active)
315*4882a593Smuzhiyun cancel_delayed_work_sync(&cs->work_tdr);
316*4882a593Smuzhiyun
317*4882a593Smuzhiyun spin_lock(&hdev->hw_queues_mirror_lock);
318*4882a593Smuzhiyun
319*4882a593Smuzhiyun /* queue TDR for next CS */
320*4882a593Smuzhiyun next = list_first_entry_or_null(
321*4882a593Smuzhiyun &hdev->hw_queues_mirror_list,
322*4882a593Smuzhiyun struct hl_cs, mirror_node);
323*4882a593Smuzhiyun
324*4882a593Smuzhiyun if ((next) && (!next->tdr_active)) {
325*4882a593Smuzhiyun next->tdr_active = true;
326*4882a593Smuzhiyun schedule_delayed_work(&next->work_tdr,
327*4882a593Smuzhiyun hdev->timeout_jiffies);
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun spin_unlock(&hdev->hw_queues_mirror_lock);
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun } else if (cs->type == CS_TYPE_WAIT) {
333*4882a593Smuzhiyun /*
334*4882a593Smuzhiyun * In case the wait for signal CS was submitted, the put occurs
335*4882a593Smuzhiyun * in init_signal_wait_cs() right before hanging on the PQ.
336*4882a593Smuzhiyun */
337*4882a593Smuzhiyun hl_fence_put(cs->signal_fence);
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun
340*4882a593Smuzhiyun /*
341*4882a593Smuzhiyun * Must be called before hl_ctx_put because inside we use ctx to get
342*4882a593Smuzhiyun * the device
343*4882a593Smuzhiyun */
344*4882a593Smuzhiyun hl_debugfs_remove_cs(cs);
345*4882a593Smuzhiyun
346*4882a593Smuzhiyun hl_ctx_put(cs->ctx);
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun /* We need to mark an error for not submitted because in that case
349*4882a593Smuzhiyun * the hl fence release flow is different. Mainly, we don't need
350*4882a593Smuzhiyun * to handle hw_sob for signal/wait
351*4882a593Smuzhiyun */
352*4882a593Smuzhiyun if (cs->timedout)
353*4882a593Smuzhiyun cs->fence->error = -ETIMEDOUT;
354*4882a593Smuzhiyun else if (cs->aborted)
355*4882a593Smuzhiyun cs->fence->error = -EIO;
356*4882a593Smuzhiyun else if (!cs->submitted)
357*4882a593Smuzhiyun cs->fence->error = -EBUSY;
358*4882a593Smuzhiyun
359*4882a593Smuzhiyun complete_all(&cs->fence->completion);
360*4882a593Smuzhiyun hl_fence_put(cs->fence);
361*4882a593Smuzhiyun cs_counters_aggregate(hdev, cs->ctx);
362*4882a593Smuzhiyun
363*4882a593Smuzhiyun kfree(cs->jobs_in_queue_cnt);
364*4882a593Smuzhiyun kfree(cs);
365*4882a593Smuzhiyun }
366*4882a593Smuzhiyun
cs_timedout(struct work_struct * work)367*4882a593Smuzhiyun static void cs_timedout(struct work_struct *work)
368*4882a593Smuzhiyun {
369*4882a593Smuzhiyun struct hl_device *hdev;
370*4882a593Smuzhiyun int rc;
371*4882a593Smuzhiyun struct hl_cs *cs = container_of(work, struct hl_cs,
372*4882a593Smuzhiyun work_tdr.work);
373*4882a593Smuzhiyun rc = cs_get_unless_zero(cs);
374*4882a593Smuzhiyun if (!rc)
375*4882a593Smuzhiyun return;
376*4882a593Smuzhiyun
377*4882a593Smuzhiyun if ((!cs->submitted) || (cs->completed)) {
378*4882a593Smuzhiyun cs_put(cs);
379*4882a593Smuzhiyun return;
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
382*4882a593Smuzhiyun /* Mark the CS is timed out so we won't try to cancel its TDR */
383*4882a593Smuzhiyun cs->timedout = true;
384*4882a593Smuzhiyun
385*4882a593Smuzhiyun hdev = cs->ctx->hdev;
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun dev_err(hdev->dev,
388*4882a593Smuzhiyun "Command submission %llu has not finished in time!\n",
389*4882a593Smuzhiyun cs->sequence);
390*4882a593Smuzhiyun
391*4882a593Smuzhiyun cs_put(cs);
392*4882a593Smuzhiyun
393*4882a593Smuzhiyun if (hdev->reset_on_lockup)
394*4882a593Smuzhiyun hl_device_reset(hdev, false, false);
395*4882a593Smuzhiyun }
396*4882a593Smuzhiyun
allocate_cs(struct hl_device * hdev,struct hl_ctx * ctx,enum hl_cs_type cs_type,struct hl_cs ** cs_new)397*4882a593Smuzhiyun static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
398*4882a593Smuzhiyun enum hl_cs_type cs_type, struct hl_cs **cs_new)
399*4882a593Smuzhiyun {
400*4882a593Smuzhiyun struct hl_cs_compl *cs_cmpl;
401*4882a593Smuzhiyun struct hl_fence *other = NULL;
402*4882a593Smuzhiyun struct hl_cs *cs;
403*4882a593Smuzhiyun int rc;
404*4882a593Smuzhiyun
405*4882a593Smuzhiyun cs = kzalloc(sizeof(*cs), GFP_ATOMIC);
406*4882a593Smuzhiyun if (!cs)
407*4882a593Smuzhiyun return -ENOMEM;
408*4882a593Smuzhiyun
409*4882a593Smuzhiyun cs->ctx = ctx;
410*4882a593Smuzhiyun cs->submitted = false;
411*4882a593Smuzhiyun cs->completed = false;
412*4882a593Smuzhiyun cs->type = cs_type;
413*4882a593Smuzhiyun INIT_LIST_HEAD(&cs->job_list);
414*4882a593Smuzhiyun INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout);
415*4882a593Smuzhiyun kref_init(&cs->refcount);
416*4882a593Smuzhiyun spin_lock_init(&cs->job_lock);
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
419*4882a593Smuzhiyun if (!cs_cmpl) {
420*4882a593Smuzhiyun rc = -ENOMEM;
421*4882a593Smuzhiyun goto free_cs;
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun
424*4882a593Smuzhiyun cs_cmpl->hdev = hdev;
425*4882a593Smuzhiyun cs_cmpl->type = cs->type;
426*4882a593Smuzhiyun spin_lock_init(&cs_cmpl->lock);
427*4882a593Smuzhiyun cs->fence = &cs_cmpl->base_fence;
428*4882a593Smuzhiyun
429*4882a593Smuzhiyun spin_lock(&ctx->cs_lock);
430*4882a593Smuzhiyun
431*4882a593Smuzhiyun cs_cmpl->cs_seq = ctx->cs_sequence;
432*4882a593Smuzhiyun other = ctx->cs_pending[cs_cmpl->cs_seq &
433*4882a593Smuzhiyun (hdev->asic_prop.max_pending_cs - 1)];
434*4882a593Smuzhiyun
435*4882a593Smuzhiyun if (other && !completion_done(&other->completion)) {
436*4882a593Smuzhiyun dev_dbg_ratelimited(hdev->dev,
437*4882a593Smuzhiyun "Rejecting CS because of too many in-flights CS\n");
438*4882a593Smuzhiyun ctx->cs_counters.max_cs_in_flight_drop_cnt++;
439*4882a593Smuzhiyun rc = -EAGAIN;
440*4882a593Smuzhiyun goto free_fence;
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun
443*4882a593Smuzhiyun cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues,
444*4882a593Smuzhiyun sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC);
445*4882a593Smuzhiyun if (!cs->jobs_in_queue_cnt) {
446*4882a593Smuzhiyun rc = -ENOMEM;
447*4882a593Smuzhiyun goto free_fence;
448*4882a593Smuzhiyun }
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun /* init hl_fence */
451*4882a593Smuzhiyun hl_fence_init(&cs_cmpl->base_fence);
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun cs->sequence = cs_cmpl->cs_seq;
454*4882a593Smuzhiyun
455*4882a593Smuzhiyun ctx->cs_pending[cs_cmpl->cs_seq &
456*4882a593Smuzhiyun (hdev->asic_prop.max_pending_cs - 1)] =
457*4882a593Smuzhiyun &cs_cmpl->base_fence;
458*4882a593Smuzhiyun ctx->cs_sequence++;
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun hl_fence_get(&cs_cmpl->base_fence);
461*4882a593Smuzhiyun
462*4882a593Smuzhiyun hl_fence_put(other);
463*4882a593Smuzhiyun
464*4882a593Smuzhiyun spin_unlock(&ctx->cs_lock);
465*4882a593Smuzhiyun
466*4882a593Smuzhiyun *cs_new = cs;
467*4882a593Smuzhiyun
468*4882a593Smuzhiyun return 0;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun free_fence:
471*4882a593Smuzhiyun spin_unlock(&ctx->cs_lock);
472*4882a593Smuzhiyun kfree(cs_cmpl);
473*4882a593Smuzhiyun free_cs:
474*4882a593Smuzhiyun kfree(cs);
475*4882a593Smuzhiyun return rc;
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun
cs_rollback(struct hl_device * hdev,struct hl_cs * cs)478*4882a593Smuzhiyun static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
479*4882a593Smuzhiyun {
480*4882a593Smuzhiyun struct hl_cs_job *job, *tmp;
481*4882a593Smuzhiyun
482*4882a593Smuzhiyun list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
483*4882a593Smuzhiyun free_job(hdev, job);
484*4882a593Smuzhiyun }
485*4882a593Smuzhiyun
hl_cs_rollback_all(struct hl_device * hdev)486*4882a593Smuzhiyun void hl_cs_rollback_all(struct hl_device *hdev)
487*4882a593Smuzhiyun {
488*4882a593Smuzhiyun int i;
489*4882a593Smuzhiyun struct hl_cs *cs, *tmp;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun /* flush all completions */
492*4882a593Smuzhiyun for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
493*4882a593Smuzhiyun flush_workqueue(hdev->cq_wq[i]);
494*4882a593Smuzhiyun
495*4882a593Smuzhiyun /* Make sure we don't have leftovers in the H/W queues mirror list */
496*4882a593Smuzhiyun list_for_each_entry_safe(cs, tmp, &hdev->hw_queues_mirror_list,
497*4882a593Smuzhiyun mirror_node) {
498*4882a593Smuzhiyun cs_get(cs);
499*4882a593Smuzhiyun cs->aborted = true;
500*4882a593Smuzhiyun dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
501*4882a593Smuzhiyun cs->ctx->asid, cs->sequence);
502*4882a593Smuzhiyun cs_rollback(hdev, cs);
503*4882a593Smuzhiyun cs_put(cs);
504*4882a593Smuzhiyun }
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
job_wq_completion(struct work_struct * work)507*4882a593Smuzhiyun static void job_wq_completion(struct work_struct *work)
508*4882a593Smuzhiyun {
509*4882a593Smuzhiyun struct hl_cs_job *job = container_of(work, struct hl_cs_job,
510*4882a593Smuzhiyun finish_work);
511*4882a593Smuzhiyun struct hl_cs *cs = job->cs;
512*4882a593Smuzhiyun struct hl_device *hdev = cs->ctx->hdev;
513*4882a593Smuzhiyun
514*4882a593Smuzhiyun /* job is no longer needed */
515*4882a593Smuzhiyun free_job(hdev, job);
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun
validate_queue_index(struct hl_device * hdev,struct hl_cs_chunk * chunk,enum hl_queue_type * queue_type,bool * is_kernel_allocated_cb)518*4882a593Smuzhiyun static int validate_queue_index(struct hl_device *hdev,
519*4882a593Smuzhiyun struct hl_cs_chunk *chunk,
520*4882a593Smuzhiyun enum hl_queue_type *queue_type,
521*4882a593Smuzhiyun bool *is_kernel_allocated_cb)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun struct asic_fixed_properties *asic = &hdev->asic_prop;
524*4882a593Smuzhiyun struct hw_queue_properties *hw_queue_prop;
525*4882a593Smuzhiyun
526*4882a593Smuzhiyun /* This must be checked here to prevent out-of-bounds access to
527*4882a593Smuzhiyun * hw_queues_props array
528*4882a593Smuzhiyun */
529*4882a593Smuzhiyun if (chunk->queue_index >= asic->max_queues) {
530*4882a593Smuzhiyun dev_err(hdev->dev, "Queue index %d is invalid\n",
531*4882a593Smuzhiyun chunk->queue_index);
532*4882a593Smuzhiyun return -EINVAL;
533*4882a593Smuzhiyun }
534*4882a593Smuzhiyun
535*4882a593Smuzhiyun hw_queue_prop = &asic->hw_queues_props[chunk->queue_index];
536*4882a593Smuzhiyun
537*4882a593Smuzhiyun if (hw_queue_prop->type == QUEUE_TYPE_NA) {
538*4882a593Smuzhiyun dev_err(hdev->dev, "Queue index %d is invalid\n",
539*4882a593Smuzhiyun chunk->queue_index);
540*4882a593Smuzhiyun return -EINVAL;
541*4882a593Smuzhiyun }
542*4882a593Smuzhiyun
543*4882a593Smuzhiyun if (hw_queue_prop->driver_only) {
544*4882a593Smuzhiyun dev_err(hdev->dev,
545*4882a593Smuzhiyun "Queue index %d is restricted for the kernel driver\n",
546*4882a593Smuzhiyun chunk->queue_index);
547*4882a593Smuzhiyun return -EINVAL;
548*4882a593Smuzhiyun }
549*4882a593Smuzhiyun
550*4882a593Smuzhiyun *queue_type = hw_queue_prop->type;
551*4882a593Smuzhiyun *is_kernel_allocated_cb = !!hw_queue_prop->requires_kernel_cb;
552*4882a593Smuzhiyun
553*4882a593Smuzhiyun return 0;
554*4882a593Smuzhiyun }
555*4882a593Smuzhiyun
get_cb_from_cs_chunk(struct hl_device * hdev,struct hl_cb_mgr * cb_mgr,struct hl_cs_chunk * chunk)556*4882a593Smuzhiyun static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev,
557*4882a593Smuzhiyun struct hl_cb_mgr *cb_mgr,
558*4882a593Smuzhiyun struct hl_cs_chunk *chunk)
559*4882a593Smuzhiyun {
560*4882a593Smuzhiyun struct hl_cb *cb;
561*4882a593Smuzhiyun u32 cb_handle;
562*4882a593Smuzhiyun
563*4882a593Smuzhiyun cb_handle = (u32) (chunk->cb_handle >> PAGE_SHIFT);
564*4882a593Smuzhiyun
565*4882a593Smuzhiyun cb = hl_cb_get(hdev, cb_mgr, cb_handle);
566*4882a593Smuzhiyun if (!cb) {
567*4882a593Smuzhiyun dev_err(hdev->dev, "CB handle 0x%x invalid\n", cb_handle);
568*4882a593Smuzhiyun return NULL;
569*4882a593Smuzhiyun }
570*4882a593Smuzhiyun
571*4882a593Smuzhiyun if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) {
572*4882a593Smuzhiyun dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size);
573*4882a593Smuzhiyun goto release_cb;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun
576*4882a593Smuzhiyun spin_lock(&cb->lock);
577*4882a593Smuzhiyun cb->cs_cnt++;
578*4882a593Smuzhiyun spin_unlock(&cb->lock);
579*4882a593Smuzhiyun
580*4882a593Smuzhiyun return cb;
581*4882a593Smuzhiyun
582*4882a593Smuzhiyun release_cb:
583*4882a593Smuzhiyun hl_cb_put(cb);
584*4882a593Smuzhiyun return NULL;
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun
hl_cs_allocate_job(struct hl_device * hdev,enum hl_queue_type queue_type,bool is_kernel_allocated_cb)587*4882a593Smuzhiyun struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
588*4882a593Smuzhiyun enum hl_queue_type queue_type, bool is_kernel_allocated_cb)
589*4882a593Smuzhiyun {
590*4882a593Smuzhiyun struct hl_cs_job *job;
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun job = kzalloc(sizeof(*job), GFP_ATOMIC);
593*4882a593Smuzhiyun if (!job)
594*4882a593Smuzhiyun return NULL;
595*4882a593Smuzhiyun
596*4882a593Smuzhiyun job->queue_type = queue_type;
597*4882a593Smuzhiyun job->is_kernel_allocated_cb = is_kernel_allocated_cb;
598*4882a593Smuzhiyun
599*4882a593Smuzhiyun if (is_cb_patched(hdev, job))
600*4882a593Smuzhiyun INIT_LIST_HEAD(&job->userptr_list);
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun if (job->queue_type == QUEUE_TYPE_EXT)
603*4882a593Smuzhiyun INIT_WORK(&job->finish_work, job_wq_completion);
604*4882a593Smuzhiyun
605*4882a593Smuzhiyun return job;
606*4882a593Smuzhiyun }
607*4882a593Smuzhiyun
cs_ioctl_default(struct hl_fpriv * hpriv,void __user * chunks,u32 num_chunks,u64 * cs_seq)608*4882a593Smuzhiyun static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
609*4882a593Smuzhiyun u32 num_chunks, u64 *cs_seq)
610*4882a593Smuzhiyun {
611*4882a593Smuzhiyun struct hl_device *hdev = hpriv->hdev;
612*4882a593Smuzhiyun struct hl_cs_chunk *cs_chunk_array;
613*4882a593Smuzhiyun struct hl_cs_job *job;
614*4882a593Smuzhiyun struct hl_cs *cs;
615*4882a593Smuzhiyun struct hl_cb *cb;
616*4882a593Smuzhiyun bool int_queues_only = true;
617*4882a593Smuzhiyun u32 size_to_copy;
618*4882a593Smuzhiyun int rc, i;
619*4882a593Smuzhiyun
620*4882a593Smuzhiyun *cs_seq = ULLONG_MAX;
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun if (num_chunks > HL_MAX_JOBS_PER_CS) {
623*4882a593Smuzhiyun dev_err(hdev->dev,
624*4882a593Smuzhiyun "Number of chunks can NOT be larger than %d\n",
625*4882a593Smuzhiyun HL_MAX_JOBS_PER_CS);
626*4882a593Smuzhiyun rc = -EINVAL;
627*4882a593Smuzhiyun goto out;
628*4882a593Smuzhiyun }
629*4882a593Smuzhiyun
630*4882a593Smuzhiyun cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
631*4882a593Smuzhiyun GFP_ATOMIC);
632*4882a593Smuzhiyun if (!cs_chunk_array) {
633*4882a593Smuzhiyun rc = -ENOMEM;
634*4882a593Smuzhiyun goto out;
635*4882a593Smuzhiyun }
636*4882a593Smuzhiyun
637*4882a593Smuzhiyun size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
638*4882a593Smuzhiyun if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
639*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
640*4882a593Smuzhiyun rc = -EFAULT;
641*4882a593Smuzhiyun goto free_cs_chunk_array;
642*4882a593Smuzhiyun }
643*4882a593Smuzhiyun
644*4882a593Smuzhiyun /* increment refcnt for context */
645*4882a593Smuzhiyun hl_ctx_get(hdev, hpriv->ctx);
646*4882a593Smuzhiyun
647*4882a593Smuzhiyun rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
648*4882a593Smuzhiyun if (rc) {
649*4882a593Smuzhiyun hl_ctx_put(hpriv->ctx);
650*4882a593Smuzhiyun goto free_cs_chunk_array;
651*4882a593Smuzhiyun }
652*4882a593Smuzhiyun
653*4882a593Smuzhiyun *cs_seq = cs->sequence;
654*4882a593Smuzhiyun
655*4882a593Smuzhiyun hl_debugfs_add_cs(cs);
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun /* Validate ALL the CS chunks before submitting the CS */
658*4882a593Smuzhiyun for (i = 0 ; i < num_chunks ; i++) {
659*4882a593Smuzhiyun struct hl_cs_chunk *chunk = &cs_chunk_array[i];
660*4882a593Smuzhiyun enum hl_queue_type queue_type;
661*4882a593Smuzhiyun bool is_kernel_allocated_cb;
662*4882a593Smuzhiyun
663*4882a593Smuzhiyun rc = validate_queue_index(hdev, chunk, &queue_type,
664*4882a593Smuzhiyun &is_kernel_allocated_cb);
665*4882a593Smuzhiyun if (rc) {
666*4882a593Smuzhiyun hpriv->ctx->cs_counters.parsing_drop_cnt++;
667*4882a593Smuzhiyun goto free_cs_object;
668*4882a593Smuzhiyun }
669*4882a593Smuzhiyun
670*4882a593Smuzhiyun if (is_kernel_allocated_cb) {
671*4882a593Smuzhiyun cb = get_cb_from_cs_chunk(hdev, &hpriv->cb_mgr, chunk);
672*4882a593Smuzhiyun if (!cb) {
673*4882a593Smuzhiyun hpriv->ctx->cs_counters.parsing_drop_cnt++;
674*4882a593Smuzhiyun rc = -EINVAL;
675*4882a593Smuzhiyun goto free_cs_object;
676*4882a593Smuzhiyun }
677*4882a593Smuzhiyun } else {
678*4882a593Smuzhiyun cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
682*4882a593Smuzhiyun int_queues_only = false;
683*4882a593Smuzhiyun
684*4882a593Smuzhiyun job = hl_cs_allocate_job(hdev, queue_type,
685*4882a593Smuzhiyun is_kernel_allocated_cb);
686*4882a593Smuzhiyun if (!job) {
687*4882a593Smuzhiyun hpriv->ctx->cs_counters.out_of_mem_drop_cnt++;
688*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to allocate a new job\n");
689*4882a593Smuzhiyun rc = -ENOMEM;
690*4882a593Smuzhiyun if (is_kernel_allocated_cb)
691*4882a593Smuzhiyun goto release_cb;
692*4882a593Smuzhiyun
693*4882a593Smuzhiyun goto free_cs_object;
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun job->id = i + 1;
697*4882a593Smuzhiyun job->cs = cs;
698*4882a593Smuzhiyun job->user_cb = cb;
699*4882a593Smuzhiyun job->user_cb_size = chunk->cb_size;
700*4882a593Smuzhiyun job->hw_queue_id = chunk->queue_index;
701*4882a593Smuzhiyun
702*4882a593Smuzhiyun cs->jobs_in_queue_cnt[job->hw_queue_id]++;
703*4882a593Smuzhiyun
704*4882a593Smuzhiyun list_add_tail(&job->cs_node, &cs->job_list);
705*4882a593Smuzhiyun
706*4882a593Smuzhiyun /*
707*4882a593Smuzhiyun * Increment CS reference. When CS reference is 0, CS is
708*4882a593Smuzhiyun * done and can be signaled to user and free all its resources
709*4882a593Smuzhiyun * Only increment for JOB on external or H/W queues, because
710*4882a593Smuzhiyun * only for those JOBs we get completion
711*4882a593Smuzhiyun */
712*4882a593Smuzhiyun if (job->queue_type == QUEUE_TYPE_EXT ||
713*4882a593Smuzhiyun job->queue_type == QUEUE_TYPE_HW)
714*4882a593Smuzhiyun cs_get(cs);
715*4882a593Smuzhiyun
716*4882a593Smuzhiyun hl_debugfs_add_job(hdev, job);
717*4882a593Smuzhiyun
718*4882a593Smuzhiyun rc = cs_parser(hpriv, job);
719*4882a593Smuzhiyun if (rc) {
720*4882a593Smuzhiyun hpriv->ctx->cs_counters.parsing_drop_cnt++;
721*4882a593Smuzhiyun dev_err(hdev->dev,
722*4882a593Smuzhiyun "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
723*4882a593Smuzhiyun cs->ctx->asid, cs->sequence, job->id, rc);
724*4882a593Smuzhiyun goto free_cs_object;
725*4882a593Smuzhiyun }
726*4882a593Smuzhiyun }
727*4882a593Smuzhiyun
728*4882a593Smuzhiyun if (int_queues_only) {
729*4882a593Smuzhiyun hpriv->ctx->cs_counters.parsing_drop_cnt++;
730*4882a593Smuzhiyun dev_err(hdev->dev,
731*4882a593Smuzhiyun "Reject CS %d.%llu because only internal queues jobs are present\n",
732*4882a593Smuzhiyun cs->ctx->asid, cs->sequence);
733*4882a593Smuzhiyun rc = -EINVAL;
734*4882a593Smuzhiyun goto free_cs_object;
735*4882a593Smuzhiyun }
736*4882a593Smuzhiyun
737*4882a593Smuzhiyun rc = hl_hw_queue_schedule_cs(cs);
738*4882a593Smuzhiyun if (rc) {
739*4882a593Smuzhiyun if (rc != -EAGAIN)
740*4882a593Smuzhiyun dev_err(hdev->dev,
741*4882a593Smuzhiyun "Failed to submit CS %d.%llu to H/W queues, error %d\n",
742*4882a593Smuzhiyun cs->ctx->asid, cs->sequence, rc);
743*4882a593Smuzhiyun goto free_cs_object;
744*4882a593Smuzhiyun }
745*4882a593Smuzhiyun
746*4882a593Smuzhiyun rc = HL_CS_STATUS_SUCCESS;
747*4882a593Smuzhiyun goto put_cs;
748*4882a593Smuzhiyun
749*4882a593Smuzhiyun release_cb:
750*4882a593Smuzhiyun spin_lock(&cb->lock);
751*4882a593Smuzhiyun cb->cs_cnt--;
752*4882a593Smuzhiyun spin_unlock(&cb->lock);
753*4882a593Smuzhiyun hl_cb_put(cb);
754*4882a593Smuzhiyun free_cs_object:
755*4882a593Smuzhiyun cs_rollback(hdev, cs);
756*4882a593Smuzhiyun *cs_seq = ULLONG_MAX;
757*4882a593Smuzhiyun /* The path below is both for good and erroneous exits */
758*4882a593Smuzhiyun put_cs:
759*4882a593Smuzhiyun /* We finished with the CS in this function, so put the ref */
760*4882a593Smuzhiyun cs_put(cs);
761*4882a593Smuzhiyun free_cs_chunk_array:
762*4882a593Smuzhiyun kfree(cs_chunk_array);
763*4882a593Smuzhiyun out:
764*4882a593Smuzhiyun return rc;
765*4882a593Smuzhiyun }
766*4882a593Smuzhiyun
cs_ioctl_signal_wait(struct hl_fpriv * hpriv,enum hl_cs_type cs_type,void __user * chunks,u32 num_chunks,u64 * cs_seq)767*4882a593Smuzhiyun static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
768*4882a593Smuzhiyun void __user *chunks, u32 num_chunks,
769*4882a593Smuzhiyun u64 *cs_seq)
770*4882a593Smuzhiyun {
771*4882a593Smuzhiyun struct hl_device *hdev = hpriv->hdev;
772*4882a593Smuzhiyun struct hl_ctx *ctx = hpriv->ctx;
773*4882a593Smuzhiyun struct hl_cs_chunk *cs_chunk_array, *chunk;
774*4882a593Smuzhiyun struct hw_queue_properties *hw_queue_prop;
775*4882a593Smuzhiyun struct hl_fence *sig_fence = NULL;
776*4882a593Smuzhiyun struct hl_cs_job *job;
777*4882a593Smuzhiyun struct hl_cs *cs;
778*4882a593Smuzhiyun struct hl_cb *cb;
779*4882a593Smuzhiyun enum hl_queue_type q_type;
780*4882a593Smuzhiyun u64 *signal_seq_arr = NULL, signal_seq;
781*4882a593Smuzhiyun u32 size_to_copy, q_idx, signal_seq_arr_len, cb_size;
782*4882a593Smuzhiyun int rc;
783*4882a593Smuzhiyun
784*4882a593Smuzhiyun *cs_seq = ULLONG_MAX;
785*4882a593Smuzhiyun
786*4882a593Smuzhiyun if (num_chunks > HL_MAX_JOBS_PER_CS) {
787*4882a593Smuzhiyun dev_err(hdev->dev,
788*4882a593Smuzhiyun "Number of chunks can NOT be larger than %d\n",
789*4882a593Smuzhiyun HL_MAX_JOBS_PER_CS);
790*4882a593Smuzhiyun rc = -EINVAL;
791*4882a593Smuzhiyun goto out;
792*4882a593Smuzhiyun }
793*4882a593Smuzhiyun
794*4882a593Smuzhiyun cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
795*4882a593Smuzhiyun GFP_ATOMIC);
796*4882a593Smuzhiyun if (!cs_chunk_array) {
797*4882a593Smuzhiyun rc = -ENOMEM;
798*4882a593Smuzhiyun goto out;
799*4882a593Smuzhiyun }
800*4882a593Smuzhiyun
801*4882a593Smuzhiyun size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
802*4882a593Smuzhiyun if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
803*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
804*4882a593Smuzhiyun rc = -EFAULT;
805*4882a593Smuzhiyun goto free_cs_chunk_array;
806*4882a593Smuzhiyun }
807*4882a593Smuzhiyun
808*4882a593Smuzhiyun /* currently it is guaranteed to have only one chunk */
809*4882a593Smuzhiyun chunk = &cs_chunk_array[0];
810*4882a593Smuzhiyun
811*4882a593Smuzhiyun if (chunk->queue_index >= hdev->asic_prop.max_queues) {
812*4882a593Smuzhiyun dev_err(hdev->dev, "Queue index %d is invalid\n",
813*4882a593Smuzhiyun chunk->queue_index);
814*4882a593Smuzhiyun rc = -EINVAL;
815*4882a593Smuzhiyun goto free_cs_chunk_array;
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun
818*4882a593Smuzhiyun q_idx = chunk->queue_index;
819*4882a593Smuzhiyun hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
820*4882a593Smuzhiyun q_type = hw_queue_prop->type;
821*4882a593Smuzhiyun
822*4882a593Smuzhiyun if ((q_idx >= hdev->asic_prop.max_queues) ||
823*4882a593Smuzhiyun (!hw_queue_prop->supports_sync_stream)) {
824*4882a593Smuzhiyun dev_err(hdev->dev, "Queue index %d is invalid\n", q_idx);
825*4882a593Smuzhiyun rc = -EINVAL;
826*4882a593Smuzhiyun goto free_cs_chunk_array;
827*4882a593Smuzhiyun }
828*4882a593Smuzhiyun
829*4882a593Smuzhiyun if (cs_type == CS_TYPE_WAIT) {
830*4882a593Smuzhiyun struct hl_cs_compl *sig_waitcs_cmpl;
831*4882a593Smuzhiyun
832*4882a593Smuzhiyun signal_seq_arr_len = chunk->num_signal_seq_arr;
833*4882a593Smuzhiyun
834*4882a593Smuzhiyun /* currently only one signal seq is supported */
835*4882a593Smuzhiyun if (signal_seq_arr_len != 1) {
836*4882a593Smuzhiyun dev_err(hdev->dev,
837*4882a593Smuzhiyun "Wait for signal CS supports only one signal CS seq\n");
838*4882a593Smuzhiyun rc = -EINVAL;
839*4882a593Smuzhiyun goto free_cs_chunk_array;
840*4882a593Smuzhiyun }
841*4882a593Smuzhiyun
842*4882a593Smuzhiyun signal_seq_arr = kmalloc_array(signal_seq_arr_len,
843*4882a593Smuzhiyun sizeof(*signal_seq_arr),
844*4882a593Smuzhiyun GFP_ATOMIC);
845*4882a593Smuzhiyun if (!signal_seq_arr) {
846*4882a593Smuzhiyun rc = -ENOMEM;
847*4882a593Smuzhiyun goto free_cs_chunk_array;
848*4882a593Smuzhiyun }
849*4882a593Smuzhiyun
850*4882a593Smuzhiyun size_to_copy = chunk->num_signal_seq_arr *
851*4882a593Smuzhiyun sizeof(*signal_seq_arr);
852*4882a593Smuzhiyun if (copy_from_user(signal_seq_arr,
853*4882a593Smuzhiyun u64_to_user_ptr(chunk->signal_seq_arr),
854*4882a593Smuzhiyun size_to_copy)) {
855*4882a593Smuzhiyun dev_err(hdev->dev,
856*4882a593Smuzhiyun "Failed to copy signal seq array from user\n");
857*4882a593Smuzhiyun rc = -EFAULT;
858*4882a593Smuzhiyun goto free_signal_seq_array;
859*4882a593Smuzhiyun }
860*4882a593Smuzhiyun
861*4882a593Smuzhiyun /* currently it is guaranteed to have only one signal seq */
862*4882a593Smuzhiyun signal_seq = signal_seq_arr[0];
863*4882a593Smuzhiyun sig_fence = hl_ctx_get_fence(ctx, signal_seq);
864*4882a593Smuzhiyun if (IS_ERR(sig_fence)) {
865*4882a593Smuzhiyun dev_err(hdev->dev,
866*4882a593Smuzhiyun "Failed to get signal CS with seq 0x%llx\n",
867*4882a593Smuzhiyun signal_seq);
868*4882a593Smuzhiyun rc = PTR_ERR(sig_fence);
869*4882a593Smuzhiyun goto free_signal_seq_array;
870*4882a593Smuzhiyun }
871*4882a593Smuzhiyun
872*4882a593Smuzhiyun if (!sig_fence) {
873*4882a593Smuzhiyun /* signal CS already finished */
874*4882a593Smuzhiyun rc = 0;
875*4882a593Smuzhiyun goto free_signal_seq_array;
876*4882a593Smuzhiyun }
877*4882a593Smuzhiyun
878*4882a593Smuzhiyun sig_waitcs_cmpl =
879*4882a593Smuzhiyun container_of(sig_fence, struct hl_cs_compl, base_fence);
880*4882a593Smuzhiyun
881*4882a593Smuzhiyun if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
882*4882a593Smuzhiyun dev_err(hdev->dev,
883*4882a593Smuzhiyun "CS seq 0x%llx is not of a signal CS\n",
884*4882a593Smuzhiyun signal_seq);
885*4882a593Smuzhiyun hl_fence_put(sig_fence);
886*4882a593Smuzhiyun rc = -EINVAL;
887*4882a593Smuzhiyun goto free_signal_seq_array;
888*4882a593Smuzhiyun }
889*4882a593Smuzhiyun
890*4882a593Smuzhiyun if (completion_done(&sig_fence->completion)) {
891*4882a593Smuzhiyun /* signal CS already finished */
892*4882a593Smuzhiyun hl_fence_put(sig_fence);
893*4882a593Smuzhiyun rc = 0;
894*4882a593Smuzhiyun goto free_signal_seq_array;
895*4882a593Smuzhiyun }
896*4882a593Smuzhiyun }
897*4882a593Smuzhiyun
898*4882a593Smuzhiyun /* increment refcnt for context */
899*4882a593Smuzhiyun hl_ctx_get(hdev, ctx);
900*4882a593Smuzhiyun
901*4882a593Smuzhiyun rc = allocate_cs(hdev, ctx, cs_type, &cs);
902*4882a593Smuzhiyun if (rc) {
903*4882a593Smuzhiyun if (cs_type == CS_TYPE_WAIT)
904*4882a593Smuzhiyun hl_fence_put(sig_fence);
905*4882a593Smuzhiyun hl_ctx_put(ctx);
906*4882a593Smuzhiyun goto free_signal_seq_array;
907*4882a593Smuzhiyun }
908*4882a593Smuzhiyun
909*4882a593Smuzhiyun /*
910*4882a593Smuzhiyun * Save the signal CS fence for later initialization right before
911*4882a593Smuzhiyun * hanging the wait CS on the queue.
912*4882a593Smuzhiyun */
913*4882a593Smuzhiyun if (cs->type == CS_TYPE_WAIT)
914*4882a593Smuzhiyun cs->signal_fence = sig_fence;
915*4882a593Smuzhiyun
916*4882a593Smuzhiyun hl_debugfs_add_cs(cs);
917*4882a593Smuzhiyun
918*4882a593Smuzhiyun *cs_seq = cs->sequence;
919*4882a593Smuzhiyun
920*4882a593Smuzhiyun job = hl_cs_allocate_job(hdev, q_type, true);
921*4882a593Smuzhiyun if (!job) {
922*4882a593Smuzhiyun ctx->cs_counters.out_of_mem_drop_cnt++;
923*4882a593Smuzhiyun dev_err(hdev->dev, "Failed to allocate a new job\n");
924*4882a593Smuzhiyun rc = -ENOMEM;
925*4882a593Smuzhiyun goto put_cs;
926*4882a593Smuzhiyun }
927*4882a593Smuzhiyun
928*4882a593Smuzhiyun if (cs->type == CS_TYPE_WAIT)
929*4882a593Smuzhiyun cb_size = hdev->asic_funcs->get_wait_cb_size(hdev);
930*4882a593Smuzhiyun else
931*4882a593Smuzhiyun cb_size = hdev->asic_funcs->get_signal_cb_size(hdev);
932*4882a593Smuzhiyun
933*4882a593Smuzhiyun cb = hl_cb_kernel_create(hdev, cb_size,
934*4882a593Smuzhiyun q_type == QUEUE_TYPE_HW && hdev->mmu_enable);
935*4882a593Smuzhiyun if (!cb) {
936*4882a593Smuzhiyun ctx->cs_counters.out_of_mem_drop_cnt++;
937*4882a593Smuzhiyun kfree(job);
938*4882a593Smuzhiyun rc = -EFAULT;
939*4882a593Smuzhiyun goto put_cs;
940*4882a593Smuzhiyun }
941*4882a593Smuzhiyun
942*4882a593Smuzhiyun job->id = 0;
943*4882a593Smuzhiyun job->cs = cs;
944*4882a593Smuzhiyun job->user_cb = cb;
945*4882a593Smuzhiyun job->user_cb->cs_cnt++;
946*4882a593Smuzhiyun job->user_cb_size = cb_size;
947*4882a593Smuzhiyun job->hw_queue_id = q_idx;
948*4882a593Smuzhiyun
949*4882a593Smuzhiyun /*
950*4882a593Smuzhiyun * No need in parsing, user CB is the patched CB.
951*4882a593Smuzhiyun * We call hl_cb_destroy() out of two reasons - we don't need the CB in
952*4882a593Smuzhiyun * the CB idr anymore and to decrement its refcount as it was
953*4882a593Smuzhiyun * incremented inside hl_cb_kernel_create().
954*4882a593Smuzhiyun */
955*4882a593Smuzhiyun job->patched_cb = job->user_cb;
956*4882a593Smuzhiyun job->job_cb_size = job->user_cb_size;
957*4882a593Smuzhiyun hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
958*4882a593Smuzhiyun
959*4882a593Smuzhiyun cs->jobs_in_queue_cnt[job->hw_queue_id]++;
960*4882a593Smuzhiyun
961*4882a593Smuzhiyun list_add_tail(&job->cs_node, &cs->job_list);
962*4882a593Smuzhiyun
963*4882a593Smuzhiyun /* increment refcount as for external queues we get completion */
964*4882a593Smuzhiyun cs_get(cs);
965*4882a593Smuzhiyun
966*4882a593Smuzhiyun hl_debugfs_add_job(hdev, job);
967*4882a593Smuzhiyun
968*4882a593Smuzhiyun rc = hl_hw_queue_schedule_cs(cs);
969*4882a593Smuzhiyun if (rc) {
970*4882a593Smuzhiyun if (rc != -EAGAIN)
971*4882a593Smuzhiyun dev_err(hdev->dev,
972*4882a593Smuzhiyun "Failed to submit CS %d.%llu to H/W queues, error %d\n",
973*4882a593Smuzhiyun ctx->asid, cs->sequence, rc);
974*4882a593Smuzhiyun goto free_cs_object;
975*4882a593Smuzhiyun }
976*4882a593Smuzhiyun
977*4882a593Smuzhiyun rc = HL_CS_STATUS_SUCCESS;
978*4882a593Smuzhiyun goto put_cs;
979*4882a593Smuzhiyun
980*4882a593Smuzhiyun free_cs_object:
981*4882a593Smuzhiyun cs_rollback(hdev, cs);
982*4882a593Smuzhiyun *cs_seq = ULLONG_MAX;
983*4882a593Smuzhiyun /* The path below is both for good and erroneous exits */
984*4882a593Smuzhiyun put_cs:
985*4882a593Smuzhiyun /* We finished with the CS in this function, so put the ref */
986*4882a593Smuzhiyun cs_put(cs);
987*4882a593Smuzhiyun free_signal_seq_array:
988*4882a593Smuzhiyun if (cs_type == CS_TYPE_WAIT)
989*4882a593Smuzhiyun kfree(signal_seq_arr);
990*4882a593Smuzhiyun free_cs_chunk_array:
991*4882a593Smuzhiyun kfree(cs_chunk_array);
992*4882a593Smuzhiyun out:
993*4882a593Smuzhiyun return rc;
994*4882a593Smuzhiyun }
995*4882a593Smuzhiyun
hl_cs_ioctl(struct hl_fpriv * hpriv,void * data)996*4882a593Smuzhiyun int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
997*4882a593Smuzhiyun {
998*4882a593Smuzhiyun struct hl_device *hdev = hpriv->hdev;
999*4882a593Smuzhiyun union hl_cs_args *args = data;
1000*4882a593Smuzhiyun struct hl_ctx *ctx = hpriv->ctx;
1001*4882a593Smuzhiyun void __user *chunks_execute, *chunks_restore;
1002*4882a593Smuzhiyun enum hl_cs_type cs_type;
1003*4882a593Smuzhiyun u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
1004*4882a593Smuzhiyun u64 cs_seq = ULONG_MAX;
1005*4882a593Smuzhiyun int rc, do_ctx_switch;
1006*4882a593Smuzhiyun bool need_soft_reset = false;
1007*4882a593Smuzhiyun
1008*4882a593Smuzhiyun if (hl_device_disabled_or_in_reset(hdev)) {
1009*4882a593Smuzhiyun dev_warn_ratelimited(hdev->dev,
1010*4882a593Smuzhiyun "Device is %s. Can't submit new CS\n",
1011*4882a593Smuzhiyun atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
1012*4882a593Smuzhiyun rc = -EBUSY;
1013*4882a593Smuzhiyun goto out;
1014*4882a593Smuzhiyun }
1015*4882a593Smuzhiyun
1016*4882a593Smuzhiyun sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
1017*4882a593Smuzhiyun
1018*4882a593Smuzhiyun if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
1019*4882a593Smuzhiyun dev_err(hdev->dev,
1020*4882a593Smuzhiyun "Signal and wait CS flags are mutually exclusive, context %d\n",
1021*4882a593Smuzhiyun ctx->asid);
1022*4882a593Smuzhiyun rc = -EINVAL;
1023*4882a593Smuzhiyun goto out;
1024*4882a593Smuzhiyun }
1025*4882a593Smuzhiyun
1026*4882a593Smuzhiyun if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
1027*4882a593Smuzhiyun (!hdev->supports_sync_stream))) {
1028*4882a593Smuzhiyun dev_err(hdev->dev, "Sync stream CS is not supported\n");
1029*4882a593Smuzhiyun rc = -EINVAL;
1030*4882a593Smuzhiyun goto out;
1031*4882a593Smuzhiyun }
1032*4882a593Smuzhiyun
1033*4882a593Smuzhiyun if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
1034*4882a593Smuzhiyun cs_type = CS_TYPE_SIGNAL;
1035*4882a593Smuzhiyun else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
1036*4882a593Smuzhiyun cs_type = CS_TYPE_WAIT;
1037*4882a593Smuzhiyun else
1038*4882a593Smuzhiyun cs_type = CS_TYPE_DEFAULT;
1039*4882a593Smuzhiyun
1040*4882a593Smuzhiyun chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
1041*4882a593Smuzhiyun num_chunks_execute = args->in.num_chunks_execute;
1042*4882a593Smuzhiyun
1043*4882a593Smuzhiyun if (cs_type == CS_TYPE_DEFAULT) {
1044*4882a593Smuzhiyun if (!num_chunks_execute) {
1045*4882a593Smuzhiyun dev_err(hdev->dev,
1046*4882a593Smuzhiyun "Got execute CS with 0 chunks, context %d\n",
1047*4882a593Smuzhiyun ctx->asid);
1048*4882a593Smuzhiyun rc = -EINVAL;
1049*4882a593Smuzhiyun goto out;
1050*4882a593Smuzhiyun }
1051*4882a593Smuzhiyun } else if (num_chunks_execute != 1) {
1052*4882a593Smuzhiyun dev_err(hdev->dev,
1053*4882a593Smuzhiyun "Sync stream CS mandates one chunk only, context %d\n",
1054*4882a593Smuzhiyun ctx->asid);
1055*4882a593Smuzhiyun rc = -EINVAL;
1056*4882a593Smuzhiyun goto out;
1057*4882a593Smuzhiyun }
1058*4882a593Smuzhiyun
1059*4882a593Smuzhiyun do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
1060*4882a593Smuzhiyun
1061*4882a593Smuzhiyun if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
1062*4882a593Smuzhiyun long ret;
1063*4882a593Smuzhiyun
1064*4882a593Smuzhiyun chunks_restore =
1065*4882a593Smuzhiyun (void __user *) (uintptr_t) args->in.chunks_restore;
1066*4882a593Smuzhiyun num_chunks_restore = args->in.num_chunks_restore;
1067*4882a593Smuzhiyun
1068*4882a593Smuzhiyun mutex_lock(&hpriv->restore_phase_mutex);
1069*4882a593Smuzhiyun
1070*4882a593Smuzhiyun if (do_ctx_switch) {
1071*4882a593Smuzhiyun rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
1072*4882a593Smuzhiyun if (rc) {
1073*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
1074*4882a593Smuzhiyun "Failed to switch to context %d, rejecting CS! %d\n",
1075*4882a593Smuzhiyun ctx->asid, rc);
1076*4882a593Smuzhiyun /*
1077*4882a593Smuzhiyun * If we timedout, or if the device is not IDLE
1078*4882a593Smuzhiyun * while we want to do context-switch (-EBUSY),
1079*4882a593Smuzhiyun * we need to soft-reset because QMAN is
1080*4882a593Smuzhiyun * probably stuck. However, we can't call to
1081*4882a593Smuzhiyun * reset here directly because of deadlock, so
1082*4882a593Smuzhiyun * need to do it at the very end of this
1083*4882a593Smuzhiyun * function
1084*4882a593Smuzhiyun */
1085*4882a593Smuzhiyun if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
1086*4882a593Smuzhiyun need_soft_reset = true;
1087*4882a593Smuzhiyun mutex_unlock(&hpriv->restore_phase_mutex);
1088*4882a593Smuzhiyun goto out;
1089*4882a593Smuzhiyun }
1090*4882a593Smuzhiyun }
1091*4882a593Smuzhiyun
1092*4882a593Smuzhiyun hdev->asic_funcs->restore_phase_topology(hdev);
1093*4882a593Smuzhiyun
1094*4882a593Smuzhiyun if (!num_chunks_restore) {
1095*4882a593Smuzhiyun dev_dbg(hdev->dev,
1096*4882a593Smuzhiyun "Need to run restore phase but restore CS is empty\n");
1097*4882a593Smuzhiyun rc = 0;
1098*4882a593Smuzhiyun } else {
1099*4882a593Smuzhiyun rc = cs_ioctl_default(hpriv, chunks_restore,
1100*4882a593Smuzhiyun num_chunks_restore, &cs_seq);
1101*4882a593Smuzhiyun }
1102*4882a593Smuzhiyun
1103*4882a593Smuzhiyun mutex_unlock(&hpriv->restore_phase_mutex);
1104*4882a593Smuzhiyun
1105*4882a593Smuzhiyun if (rc) {
1106*4882a593Smuzhiyun dev_err(hdev->dev,
1107*4882a593Smuzhiyun "Failed to submit restore CS for context %d (%d)\n",
1108*4882a593Smuzhiyun ctx->asid, rc);
1109*4882a593Smuzhiyun goto out;
1110*4882a593Smuzhiyun }
1111*4882a593Smuzhiyun
1112*4882a593Smuzhiyun /* Need to wait for restore completion before execution phase */
1113*4882a593Smuzhiyun if (num_chunks_restore) {
1114*4882a593Smuzhiyun ret = _hl_cs_wait_ioctl(hdev, ctx,
1115*4882a593Smuzhiyun jiffies_to_usecs(hdev->timeout_jiffies),
1116*4882a593Smuzhiyun cs_seq);
1117*4882a593Smuzhiyun if (ret <= 0) {
1118*4882a593Smuzhiyun dev_err(hdev->dev,
1119*4882a593Smuzhiyun "Restore CS for context %d failed to complete %ld\n",
1120*4882a593Smuzhiyun ctx->asid, ret);
1121*4882a593Smuzhiyun rc = -ENOEXEC;
1122*4882a593Smuzhiyun goto out;
1123*4882a593Smuzhiyun }
1124*4882a593Smuzhiyun }
1125*4882a593Smuzhiyun
1126*4882a593Smuzhiyun ctx->thread_ctx_switch_wait_token = 1;
1127*4882a593Smuzhiyun } else if (!ctx->thread_ctx_switch_wait_token) {
1128*4882a593Smuzhiyun u32 tmp;
1129*4882a593Smuzhiyun
1130*4882a593Smuzhiyun rc = hl_poll_timeout_memory(hdev,
1131*4882a593Smuzhiyun &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
1132*4882a593Smuzhiyun 100, jiffies_to_usecs(hdev->timeout_jiffies), false);
1133*4882a593Smuzhiyun
1134*4882a593Smuzhiyun if (rc == -ETIMEDOUT) {
1135*4882a593Smuzhiyun dev_err(hdev->dev,
1136*4882a593Smuzhiyun "context switch phase timeout (%d)\n", tmp);
1137*4882a593Smuzhiyun goto out;
1138*4882a593Smuzhiyun }
1139*4882a593Smuzhiyun }
1140*4882a593Smuzhiyun
1141*4882a593Smuzhiyun if (cs_type == CS_TYPE_DEFAULT)
1142*4882a593Smuzhiyun rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
1143*4882a593Smuzhiyun &cs_seq);
1144*4882a593Smuzhiyun else
1145*4882a593Smuzhiyun rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
1146*4882a593Smuzhiyun num_chunks_execute, &cs_seq);
1147*4882a593Smuzhiyun
1148*4882a593Smuzhiyun out:
1149*4882a593Smuzhiyun if (rc != -EAGAIN) {
1150*4882a593Smuzhiyun memset(args, 0, sizeof(*args));
1151*4882a593Smuzhiyun args->out.status = rc;
1152*4882a593Smuzhiyun args->out.seq = cs_seq;
1153*4882a593Smuzhiyun }
1154*4882a593Smuzhiyun
1155*4882a593Smuzhiyun if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
1156*4882a593Smuzhiyun hl_device_reset(hdev, false, false);
1157*4882a593Smuzhiyun
1158*4882a593Smuzhiyun return rc;
1159*4882a593Smuzhiyun }
1160*4882a593Smuzhiyun
_hl_cs_wait_ioctl(struct hl_device * hdev,struct hl_ctx * ctx,u64 timeout_us,u64 seq)1161*4882a593Smuzhiyun static long _hl_cs_wait_ioctl(struct hl_device *hdev,
1162*4882a593Smuzhiyun struct hl_ctx *ctx, u64 timeout_us, u64 seq)
1163*4882a593Smuzhiyun {
1164*4882a593Smuzhiyun struct hl_fence *fence;
1165*4882a593Smuzhiyun unsigned long timeout;
1166*4882a593Smuzhiyun long rc;
1167*4882a593Smuzhiyun
1168*4882a593Smuzhiyun if (timeout_us == MAX_SCHEDULE_TIMEOUT)
1169*4882a593Smuzhiyun timeout = timeout_us;
1170*4882a593Smuzhiyun else
1171*4882a593Smuzhiyun timeout = usecs_to_jiffies(timeout_us);
1172*4882a593Smuzhiyun
1173*4882a593Smuzhiyun hl_ctx_get(hdev, ctx);
1174*4882a593Smuzhiyun
1175*4882a593Smuzhiyun fence = hl_ctx_get_fence(ctx, seq);
1176*4882a593Smuzhiyun if (IS_ERR(fence)) {
1177*4882a593Smuzhiyun rc = PTR_ERR(fence);
1178*4882a593Smuzhiyun if (rc == -EINVAL)
1179*4882a593Smuzhiyun dev_notice_ratelimited(hdev->dev,
1180*4882a593Smuzhiyun "Can't wait on CS %llu because current CS is at seq %llu\n",
1181*4882a593Smuzhiyun seq, ctx->cs_sequence);
1182*4882a593Smuzhiyun } else if (fence) {
1183*4882a593Smuzhiyun if (!timeout_us)
1184*4882a593Smuzhiyun rc = completion_done(&fence->completion);
1185*4882a593Smuzhiyun else
1186*4882a593Smuzhiyun rc = wait_for_completion_interruptible_timeout(
1187*4882a593Smuzhiyun &fence->completion, timeout);
1188*4882a593Smuzhiyun
1189*4882a593Smuzhiyun if (fence->error == -ETIMEDOUT)
1190*4882a593Smuzhiyun rc = -ETIMEDOUT;
1191*4882a593Smuzhiyun else if (fence->error == -EIO)
1192*4882a593Smuzhiyun rc = -EIO;
1193*4882a593Smuzhiyun
1194*4882a593Smuzhiyun hl_fence_put(fence);
1195*4882a593Smuzhiyun } else {
1196*4882a593Smuzhiyun dev_dbg(hdev->dev,
1197*4882a593Smuzhiyun "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
1198*4882a593Smuzhiyun seq, ctx->cs_sequence);
1199*4882a593Smuzhiyun rc = 1;
1200*4882a593Smuzhiyun }
1201*4882a593Smuzhiyun
1202*4882a593Smuzhiyun hl_ctx_put(ctx);
1203*4882a593Smuzhiyun
1204*4882a593Smuzhiyun return rc;
1205*4882a593Smuzhiyun }
1206*4882a593Smuzhiyun
hl_cs_wait_ioctl(struct hl_fpriv * hpriv,void * data)1207*4882a593Smuzhiyun int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
1208*4882a593Smuzhiyun {
1209*4882a593Smuzhiyun struct hl_device *hdev = hpriv->hdev;
1210*4882a593Smuzhiyun union hl_wait_cs_args *args = data;
1211*4882a593Smuzhiyun u64 seq = args->in.seq;
1212*4882a593Smuzhiyun long rc;
1213*4882a593Smuzhiyun
1214*4882a593Smuzhiyun rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq);
1215*4882a593Smuzhiyun
1216*4882a593Smuzhiyun memset(args, 0, sizeof(*args));
1217*4882a593Smuzhiyun
1218*4882a593Smuzhiyun if (rc < 0) {
1219*4882a593Smuzhiyun if (rc == -ERESTARTSYS) {
1220*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
1221*4882a593Smuzhiyun "user process got signal while waiting for CS handle %llu\n",
1222*4882a593Smuzhiyun seq);
1223*4882a593Smuzhiyun args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
1224*4882a593Smuzhiyun rc = -EINTR;
1225*4882a593Smuzhiyun } else if (rc == -ETIMEDOUT) {
1226*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
1227*4882a593Smuzhiyun "CS %llu has timed-out while user process is waiting for it\n",
1228*4882a593Smuzhiyun seq);
1229*4882a593Smuzhiyun args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT;
1230*4882a593Smuzhiyun } else if (rc == -EIO) {
1231*4882a593Smuzhiyun dev_err_ratelimited(hdev->dev,
1232*4882a593Smuzhiyun "CS %llu has been aborted while user process is waiting for it\n",
1233*4882a593Smuzhiyun seq);
1234*4882a593Smuzhiyun args->out.status = HL_WAIT_CS_STATUS_ABORTED;
1235*4882a593Smuzhiyun }
1236*4882a593Smuzhiyun return rc;
1237*4882a593Smuzhiyun }
1238*4882a593Smuzhiyun
1239*4882a593Smuzhiyun if (rc == 0)
1240*4882a593Smuzhiyun args->out.status = HL_WAIT_CS_STATUS_BUSY;
1241*4882a593Smuzhiyun else
1242*4882a593Smuzhiyun args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
1243*4882a593Smuzhiyun
1244*4882a593Smuzhiyun return 0;
1245*4882a593Smuzhiyun }
1246