1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> */
3*4882a593Smuzhiyun /* Copyright 2019 Collabora ltd. */
4*4882a593Smuzhiyun #include <linux/delay.h>
5*4882a593Smuzhiyun #include <linux/interrupt.h>
6*4882a593Smuzhiyun #include <linux/io.h>
7*4882a593Smuzhiyun #include <linux/platform_device.h>
8*4882a593Smuzhiyun #include <linux/pm_runtime.h>
9*4882a593Smuzhiyun #include <linux/dma-resv.h>
10*4882a593Smuzhiyun #include <drm/gpu_scheduler.h>
11*4882a593Smuzhiyun #include <drm/panfrost_drm.h>
12*4882a593Smuzhiyun
13*4882a593Smuzhiyun #include "panfrost_device.h"
14*4882a593Smuzhiyun #include "panfrost_devfreq.h"
15*4882a593Smuzhiyun #include "panfrost_job.h"
16*4882a593Smuzhiyun #include "panfrost_features.h"
17*4882a593Smuzhiyun #include "panfrost_issues.h"
18*4882a593Smuzhiyun #include "panfrost_gem.h"
19*4882a593Smuzhiyun #include "panfrost_regs.h"
20*4882a593Smuzhiyun #include "panfrost_gpu.h"
21*4882a593Smuzhiyun #include "panfrost_mmu.h"
22*4882a593Smuzhiyun
23*4882a593Smuzhiyun #define JOB_TIMEOUT_MS 500
24*4882a593Smuzhiyun
25*4882a593Smuzhiyun #define job_write(dev, reg, data) writel(data, dev->iomem + (reg))
26*4882a593Smuzhiyun #define job_read(dev, reg) readl(dev->iomem + (reg))
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun enum panfrost_queue_status {
29*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_ACTIVE,
30*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_STOPPED,
31*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_STARTING,
32*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_FAULT_PENDING,
33*4882a593Smuzhiyun };
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun struct panfrost_queue_state {
36*4882a593Smuzhiyun struct drm_gpu_scheduler sched;
37*4882a593Smuzhiyun atomic_t status;
38*4882a593Smuzhiyun struct mutex lock;
39*4882a593Smuzhiyun u64 fence_context;
40*4882a593Smuzhiyun u64 emit_seqno;
41*4882a593Smuzhiyun };
42*4882a593Smuzhiyun
43*4882a593Smuzhiyun struct panfrost_job_slot {
44*4882a593Smuzhiyun struct panfrost_queue_state queue[NUM_JOB_SLOTS];
45*4882a593Smuzhiyun spinlock_t job_lock;
46*4882a593Smuzhiyun };
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun static struct panfrost_job *
to_panfrost_job(struct drm_sched_job * sched_job)49*4882a593Smuzhiyun to_panfrost_job(struct drm_sched_job *sched_job)
50*4882a593Smuzhiyun {
51*4882a593Smuzhiyun return container_of(sched_job, struct panfrost_job, base);
52*4882a593Smuzhiyun }
53*4882a593Smuzhiyun
54*4882a593Smuzhiyun struct panfrost_fence {
55*4882a593Smuzhiyun struct dma_fence base;
56*4882a593Smuzhiyun struct drm_device *dev;
57*4882a593Smuzhiyun /* panfrost seqno for signaled() test */
58*4882a593Smuzhiyun u64 seqno;
59*4882a593Smuzhiyun int queue;
60*4882a593Smuzhiyun };
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun static inline struct panfrost_fence *
to_panfrost_fence(struct dma_fence * fence)63*4882a593Smuzhiyun to_panfrost_fence(struct dma_fence *fence)
64*4882a593Smuzhiyun {
65*4882a593Smuzhiyun return (struct panfrost_fence *)fence;
66*4882a593Smuzhiyun }
67*4882a593Smuzhiyun
panfrost_fence_get_driver_name(struct dma_fence * fence)68*4882a593Smuzhiyun static const char *panfrost_fence_get_driver_name(struct dma_fence *fence)
69*4882a593Smuzhiyun {
70*4882a593Smuzhiyun return "panfrost";
71*4882a593Smuzhiyun }
72*4882a593Smuzhiyun
panfrost_fence_get_timeline_name(struct dma_fence * fence)73*4882a593Smuzhiyun static const char *panfrost_fence_get_timeline_name(struct dma_fence *fence)
74*4882a593Smuzhiyun {
75*4882a593Smuzhiyun struct panfrost_fence *f = to_panfrost_fence(fence);
76*4882a593Smuzhiyun
77*4882a593Smuzhiyun switch (f->queue) {
78*4882a593Smuzhiyun case 0:
79*4882a593Smuzhiyun return "panfrost-js-0";
80*4882a593Smuzhiyun case 1:
81*4882a593Smuzhiyun return "panfrost-js-1";
82*4882a593Smuzhiyun case 2:
83*4882a593Smuzhiyun return "panfrost-js-2";
84*4882a593Smuzhiyun default:
85*4882a593Smuzhiyun return NULL;
86*4882a593Smuzhiyun }
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun static const struct dma_fence_ops panfrost_fence_ops = {
90*4882a593Smuzhiyun .get_driver_name = panfrost_fence_get_driver_name,
91*4882a593Smuzhiyun .get_timeline_name = panfrost_fence_get_timeline_name,
92*4882a593Smuzhiyun };
93*4882a593Smuzhiyun
panfrost_fence_create(struct panfrost_device * pfdev,int js_num)94*4882a593Smuzhiyun static struct dma_fence *panfrost_fence_create(struct panfrost_device *pfdev, int js_num)
95*4882a593Smuzhiyun {
96*4882a593Smuzhiyun struct panfrost_fence *fence;
97*4882a593Smuzhiyun struct panfrost_job_slot *js = pfdev->js;
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun fence = kzalloc(sizeof(*fence), GFP_KERNEL);
100*4882a593Smuzhiyun if (!fence)
101*4882a593Smuzhiyun return ERR_PTR(-ENOMEM);
102*4882a593Smuzhiyun
103*4882a593Smuzhiyun fence->dev = pfdev->ddev;
104*4882a593Smuzhiyun fence->queue = js_num;
105*4882a593Smuzhiyun fence->seqno = ++js->queue[js_num].emit_seqno;
106*4882a593Smuzhiyun dma_fence_init(&fence->base, &panfrost_fence_ops, &js->job_lock,
107*4882a593Smuzhiyun js->queue[js_num].fence_context, fence->seqno);
108*4882a593Smuzhiyun
109*4882a593Smuzhiyun return &fence->base;
110*4882a593Smuzhiyun }
111*4882a593Smuzhiyun
panfrost_job_get_slot(struct panfrost_job * job)112*4882a593Smuzhiyun static int panfrost_job_get_slot(struct panfrost_job *job)
113*4882a593Smuzhiyun {
114*4882a593Smuzhiyun /* JS0: fragment jobs.
115*4882a593Smuzhiyun * JS1: vertex/tiler jobs
116*4882a593Smuzhiyun * JS2: compute jobs
117*4882a593Smuzhiyun */
118*4882a593Smuzhiyun if (job->requirements & PANFROST_JD_REQ_FS)
119*4882a593Smuzhiyun return 0;
120*4882a593Smuzhiyun
121*4882a593Smuzhiyun /* Not exposed to userspace yet */
122*4882a593Smuzhiyun #if 0
123*4882a593Smuzhiyun if (job->requirements & PANFROST_JD_REQ_ONLY_COMPUTE) {
124*4882a593Smuzhiyun if ((job->requirements & PANFROST_JD_REQ_CORE_GRP_MASK) &&
125*4882a593Smuzhiyun (job->pfdev->features.nr_core_groups == 2))
126*4882a593Smuzhiyun return 2;
127*4882a593Smuzhiyun if (panfrost_has_hw_issue(job->pfdev, HW_ISSUE_8987))
128*4882a593Smuzhiyun return 2;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun #endif
131*4882a593Smuzhiyun return 1;
132*4882a593Smuzhiyun }
133*4882a593Smuzhiyun
panfrost_job_write_affinity(struct panfrost_device * pfdev,u32 requirements,int js)134*4882a593Smuzhiyun static void panfrost_job_write_affinity(struct panfrost_device *pfdev,
135*4882a593Smuzhiyun u32 requirements,
136*4882a593Smuzhiyun int js)
137*4882a593Smuzhiyun {
138*4882a593Smuzhiyun u64 affinity;
139*4882a593Smuzhiyun
140*4882a593Smuzhiyun /*
141*4882a593Smuzhiyun * Use all cores for now.
142*4882a593Smuzhiyun * Eventually we may need to support tiler only jobs and h/w with
143*4882a593Smuzhiyun * multiple (2) coherent core groups
144*4882a593Smuzhiyun */
145*4882a593Smuzhiyun affinity = pfdev->features.shader_present;
146*4882a593Smuzhiyun
147*4882a593Smuzhiyun job_write(pfdev, JS_AFFINITY_NEXT_LO(js), affinity & 0xFFFFFFFF);
148*4882a593Smuzhiyun job_write(pfdev, JS_AFFINITY_NEXT_HI(js), affinity >> 32);
149*4882a593Smuzhiyun }
150*4882a593Smuzhiyun
panfrost_job_hw_submit(struct panfrost_job * job,int js)151*4882a593Smuzhiyun static void panfrost_job_hw_submit(struct panfrost_job *job, int js)
152*4882a593Smuzhiyun {
153*4882a593Smuzhiyun struct panfrost_device *pfdev = job->pfdev;
154*4882a593Smuzhiyun u32 cfg;
155*4882a593Smuzhiyun u64 jc_head = job->jc;
156*4882a593Smuzhiyun int ret;
157*4882a593Smuzhiyun
158*4882a593Smuzhiyun panfrost_devfreq_record_busy(&pfdev->pfdevfreq);
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun ret = pm_runtime_get_sync(pfdev->dev);
161*4882a593Smuzhiyun if (ret < 0)
162*4882a593Smuzhiyun return;
163*4882a593Smuzhiyun
164*4882a593Smuzhiyun if (WARN_ON(job_read(pfdev, JS_COMMAND_NEXT(js)))) {
165*4882a593Smuzhiyun return;
166*4882a593Smuzhiyun }
167*4882a593Smuzhiyun
168*4882a593Smuzhiyun cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu);
169*4882a593Smuzhiyun
170*4882a593Smuzhiyun job_write(pfdev, JS_HEAD_NEXT_LO(js), jc_head & 0xFFFFFFFF);
171*4882a593Smuzhiyun job_write(pfdev, JS_HEAD_NEXT_HI(js), jc_head >> 32);
172*4882a593Smuzhiyun
173*4882a593Smuzhiyun panfrost_job_write_affinity(pfdev, job->requirements, js);
174*4882a593Smuzhiyun
175*4882a593Smuzhiyun /* start MMU, medium priority, cache clean/flush on end, clean/flush on
176*4882a593Smuzhiyun * start */
177*4882a593Smuzhiyun cfg |= JS_CONFIG_THREAD_PRI(8) |
178*4882a593Smuzhiyun JS_CONFIG_START_FLUSH_CLEAN_INVALIDATE |
179*4882a593Smuzhiyun JS_CONFIG_END_FLUSH_CLEAN_INVALIDATE;
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
182*4882a593Smuzhiyun cfg |= JS_CONFIG_ENABLE_FLUSH_REDUCTION;
183*4882a593Smuzhiyun
184*4882a593Smuzhiyun if (panfrost_has_hw_issue(pfdev, HW_ISSUE_10649))
185*4882a593Smuzhiyun cfg |= JS_CONFIG_START_MMU;
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun job_write(pfdev, JS_CONFIG_NEXT(js), cfg);
188*4882a593Smuzhiyun
189*4882a593Smuzhiyun if (panfrost_has_hw_feature(pfdev, HW_FEATURE_FLUSH_REDUCTION))
190*4882a593Smuzhiyun job_write(pfdev, JS_FLUSH_ID_NEXT(js), job->flush_id);
191*4882a593Smuzhiyun
192*4882a593Smuzhiyun /* GO ! */
193*4882a593Smuzhiyun dev_dbg(pfdev->dev, "JS: Submitting atom %p to js[%d] with head=0x%llx",
194*4882a593Smuzhiyun job, js, jc_head);
195*4882a593Smuzhiyun
196*4882a593Smuzhiyun job_write(pfdev, JS_COMMAND_NEXT(js), JS_COMMAND_START);
197*4882a593Smuzhiyun }
198*4882a593Smuzhiyun
panfrost_acquire_object_fences(struct drm_gem_object ** bos,int bo_count,struct dma_fence ** implicit_fences)199*4882a593Smuzhiyun static void panfrost_acquire_object_fences(struct drm_gem_object **bos,
200*4882a593Smuzhiyun int bo_count,
201*4882a593Smuzhiyun struct dma_fence **implicit_fences)
202*4882a593Smuzhiyun {
203*4882a593Smuzhiyun int i;
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun for (i = 0; i < bo_count; i++)
206*4882a593Smuzhiyun implicit_fences[i] = dma_resv_get_excl_rcu(bos[i]->resv);
207*4882a593Smuzhiyun }
208*4882a593Smuzhiyun
panfrost_attach_object_fences(struct drm_gem_object ** bos,int bo_count,struct dma_fence * fence)209*4882a593Smuzhiyun static void panfrost_attach_object_fences(struct drm_gem_object **bos,
210*4882a593Smuzhiyun int bo_count,
211*4882a593Smuzhiyun struct dma_fence *fence)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun int i;
214*4882a593Smuzhiyun
215*4882a593Smuzhiyun for (i = 0; i < bo_count; i++)
216*4882a593Smuzhiyun dma_resv_add_excl_fence(bos[i]->resv, fence);
217*4882a593Smuzhiyun }
218*4882a593Smuzhiyun
panfrost_job_push(struct panfrost_job * job)219*4882a593Smuzhiyun int panfrost_job_push(struct panfrost_job *job)
220*4882a593Smuzhiyun {
221*4882a593Smuzhiyun struct panfrost_device *pfdev = job->pfdev;
222*4882a593Smuzhiyun int slot = panfrost_job_get_slot(job);
223*4882a593Smuzhiyun struct drm_sched_entity *entity = &job->file_priv->sched_entity[slot];
224*4882a593Smuzhiyun struct ww_acquire_ctx acquire_ctx;
225*4882a593Smuzhiyun int ret = 0;
226*4882a593Smuzhiyun
227*4882a593Smuzhiyun mutex_lock(&pfdev->sched_lock);
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun ret = drm_gem_lock_reservations(job->bos, job->bo_count,
230*4882a593Smuzhiyun &acquire_ctx);
231*4882a593Smuzhiyun if (ret) {
232*4882a593Smuzhiyun mutex_unlock(&pfdev->sched_lock);
233*4882a593Smuzhiyun return ret;
234*4882a593Smuzhiyun }
235*4882a593Smuzhiyun
236*4882a593Smuzhiyun ret = drm_sched_job_init(&job->base, entity, NULL);
237*4882a593Smuzhiyun if (ret) {
238*4882a593Smuzhiyun mutex_unlock(&pfdev->sched_lock);
239*4882a593Smuzhiyun goto unlock;
240*4882a593Smuzhiyun }
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun job->render_done_fence = dma_fence_get(&job->base.s_fence->finished);
243*4882a593Smuzhiyun
244*4882a593Smuzhiyun kref_get(&job->refcount); /* put by scheduler job completion */
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun panfrost_acquire_object_fences(job->bos, job->bo_count,
247*4882a593Smuzhiyun job->implicit_fences);
248*4882a593Smuzhiyun
249*4882a593Smuzhiyun drm_sched_entity_push_job(&job->base, entity);
250*4882a593Smuzhiyun
251*4882a593Smuzhiyun mutex_unlock(&pfdev->sched_lock);
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun panfrost_attach_object_fences(job->bos, job->bo_count,
254*4882a593Smuzhiyun job->render_done_fence);
255*4882a593Smuzhiyun
256*4882a593Smuzhiyun unlock:
257*4882a593Smuzhiyun drm_gem_unlock_reservations(job->bos, job->bo_count, &acquire_ctx);
258*4882a593Smuzhiyun
259*4882a593Smuzhiyun return ret;
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun
panfrost_job_cleanup(struct kref * ref)262*4882a593Smuzhiyun static void panfrost_job_cleanup(struct kref *ref)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun struct panfrost_job *job = container_of(ref, struct panfrost_job,
265*4882a593Smuzhiyun refcount);
266*4882a593Smuzhiyun unsigned int i;
267*4882a593Smuzhiyun
268*4882a593Smuzhiyun if (job->in_fences) {
269*4882a593Smuzhiyun for (i = 0; i < job->in_fence_count; i++)
270*4882a593Smuzhiyun dma_fence_put(job->in_fences[i]);
271*4882a593Smuzhiyun kvfree(job->in_fences);
272*4882a593Smuzhiyun }
273*4882a593Smuzhiyun if (job->implicit_fences) {
274*4882a593Smuzhiyun for (i = 0; i < job->bo_count; i++)
275*4882a593Smuzhiyun dma_fence_put(job->implicit_fences[i]);
276*4882a593Smuzhiyun kvfree(job->implicit_fences);
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun dma_fence_put(job->done_fence);
279*4882a593Smuzhiyun dma_fence_put(job->render_done_fence);
280*4882a593Smuzhiyun
281*4882a593Smuzhiyun if (job->mappings) {
282*4882a593Smuzhiyun for (i = 0; i < job->bo_count; i++) {
283*4882a593Smuzhiyun if (!job->mappings[i])
284*4882a593Smuzhiyun break;
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun atomic_dec(&job->mappings[i]->obj->gpu_usecount);
287*4882a593Smuzhiyun panfrost_gem_mapping_put(job->mappings[i]);
288*4882a593Smuzhiyun }
289*4882a593Smuzhiyun kvfree(job->mappings);
290*4882a593Smuzhiyun }
291*4882a593Smuzhiyun
292*4882a593Smuzhiyun if (job->bos) {
293*4882a593Smuzhiyun for (i = 0; i < job->bo_count; i++)
294*4882a593Smuzhiyun drm_gem_object_put(job->bos[i]);
295*4882a593Smuzhiyun
296*4882a593Smuzhiyun kvfree(job->bos);
297*4882a593Smuzhiyun }
298*4882a593Smuzhiyun
299*4882a593Smuzhiyun kfree(job);
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
panfrost_job_put(struct panfrost_job * job)302*4882a593Smuzhiyun void panfrost_job_put(struct panfrost_job *job)
303*4882a593Smuzhiyun {
304*4882a593Smuzhiyun kref_put(&job->refcount, panfrost_job_cleanup);
305*4882a593Smuzhiyun }
306*4882a593Smuzhiyun
panfrost_job_free(struct drm_sched_job * sched_job)307*4882a593Smuzhiyun static void panfrost_job_free(struct drm_sched_job *sched_job)
308*4882a593Smuzhiyun {
309*4882a593Smuzhiyun struct panfrost_job *job = to_panfrost_job(sched_job);
310*4882a593Smuzhiyun
311*4882a593Smuzhiyun drm_sched_job_cleanup(sched_job);
312*4882a593Smuzhiyun
313*4882a593Smuzhiyun panfrost_job_put(job);
314*4882a593Smuzhiyun }
315*4882a593Smuzhiyun
panfrost_job_dependency(struct drm_sched_job * sched_job,struct drm_sched_entity * s_entity)316*4882a593Smuzhiyun static struct dma_fence *panfrost_job_dependency(struct drm_sched_job *sched_job,
317*4882a593Smuzhiyun struct drm_sched_entity *s_entity)
318*4882a593Smuzhiyun {
319*4882a593Smuzhiyun struct panfrost_job *job = to_panfrost_job(sched_job);
320*4882a593Smuzhiyun struct dma_fence *fence;
321*4882a593Smuzhiyun unsigned int i;
322*4882a593Smuzhiyun
323*4882a593Smuzhiyun /* Explicit fences */
324*4882a593Smuzhiyun for (i = 0; i < job->in_fence_count; i++) {
325*4882a593Smuzhiyun if (job->in_fences[i]) {
326*4882a593Smuzhiyun fence = job->in_fences[i];
327*4882a593Smuzhiyun job->in_fences[i] = NULL;
328*4882a593Smuzhiyun return fence;
329*4882a593Smuzhiyun }
330*4882a593Smuzhiyun }
331*4882a593Smuzhiyun
332*4882a593Smuzhiyun /* Implicit fences, max. one per BO */
333*4882a593Smuzhiyun for (i = 0; i < job->bo_count; i++) {
334*4882a593Smuzhiyun if (job->implicit_fences[i]) {
335*4882a593Smuzhiyun fence = job->implicit_fences[i];
336*4882a593Smuzhiyun job->implicit_fences[i] = NULL;
337*4882a593Smuzhiyun return fence;
338*4882a593Smuzhiyun }
339*4882a593Smuzhiyun }
340*4882a593Smuzhiyun
341*4882a593Smuzhiyun return NULL;
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun
panfrost_job_run(struct drm_sched_job * sched_job)344*4882a593Smuzhiyun static struct dma_fence *panfrost_job_run(struct drm_sched_job *sched_job)
345*4882a593Smuzhiyun {
346*4882a593Smuzhiyun struct panfrost_job *job = to_panfrost_job(sched_job);
347*4882a593Smuzhiyun struct panfrost_device *pfdev = job->pfdev;
348*4882a593Smuzhiyun int slot = panfrost_job_get_slot(job);
349*4882a593Smuzhiyun struct dma_fence *fence = NULL;
350*4882a593Smuzhiyun
351*4882a593Smuzhiyun if (unlikely(job->base.s_fence->finished.error))
352*4882a593Smuzhiyun return NULL;
353*4882a593Smuzhiyun
354*4882a593Smuzhiyun pfdev->jobs[slot] = job;
355*4882a593Smuzhiyun
356*4882a593Smuzhiyun fence = panfrost_fence_create(pfdev, slot);
357*4882a593Smuzhiyun if (IS_ERR(fence))
358*4882a593Smuzhiyun return NULL;
359*4882a593Smuzhiyun
360*4882a593Smuzhiyun if (job->done_fence)
361*4882a593Smuzhiyun dma_fence_put(job->done_fence);
362*4882a593Smuzhiyun job->done_fence = dma_fence_get(fence);
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun panfrost_job_hw_submit(job, slot);
365*4882a593Smuzhiyun
366*4882a593Smuzhiyun return fence;
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun
panfrost_job_enable_interrupts(struct panfrost_device * pfdev)369*4882a593Smuzhiyun void panfrost_job_enable_interrupts(struct panfrost_device *pfdev)
370*4882a593Smuzhiyun {
371*4882a593Smuzhiyun int j;
372*4882a593Smuzhiyun u32 irq_mask = 0;
373*4882a593Smuzhiyun
374*4882a593Smuzhiyun for (j = 0; j < NUM_JOB_SLOTS; j++) {
375*4882a593Smuzhiyun irq_mask |= MK_JS_MASK(j);
376*4882a593Smuzhiyun }
377*4882a593Smuzhiyun
378*4882a593Smuzhiyun job_write(pfdev, JOB_INT_CLEAR, irq_mask);
379*4882a593Smuzhiyun job_write(pfdev, JOB_INT_MASK, irq_mask);
380*4882a593Smuzhiyun }
381*4882a593Smuzhiyun
panfrost_scheduler_stop(struct panfrost_queue_state * queue,struct drm_sched_job * bad)382*4882a593Smuzhiyun static bool panfrost_scheduler_stop(struct panfrost_queue_state *queue,
383*4882a593Smuzhiyun struct drm_sched_job *bad)
384*4882a593Smuzhiyun {
385*4882a593Smuzhiyun enum panfrost_queue_status old_status;
386*4882a593Smuzhiyun bool stopped = false;
387*4882a593Smuzhiyun
388*4882a593Smuzhiyun mutex_lock(&queue->lock);
389*4882a593Smuzhiyun old_status = atomic_xchg(&queue->status,
390*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_STOPPED);
391*4882a593Smuzhiyun if (old_status == PANFROST_QUEUE_STATUS_STOPPED)
392*4882a593Smuzhiyun goto out;
393*4882a593Smuzhiyun
394*4882a593Smuzhiyun WARN_ON(old_status != PANFROST_QUEUE_STATUS_ACTIVE);
395*4882a593Smuzhiyun drm_sched_stop(&queue->sched, bad);
396*4882a593Smuzhiyun if (bad)
397*4882a593Smuzhiyun drm_sched_increase_karma(bad);
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun stopped = true;
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun /*
402*4882a593Smuzhiyun * Set the timeout to max so the timer doesn't get started
403*4882a593Smuzhiyun * when we return from the timeout handler (restored in
404*4882a593Smuzhiyun * panfrost_scheduler_start()).
405*4882a593Smuzhiyun */
406*4882a593Smuzhiyun queue->sched.timeout = MAX_SCHEDULE_TIMEOUT;
407*4882a593Smuzhiyun
408*4882a593Smuzhiyun out:
409*4882a593Smuzhiyun mutex_unlock(&queue->lock);
410*4882a593Smuzhiyun
411*4882a593Smuzhiyun return stopped;
412*4882a593Smuzhiyun }
413*4882a593Smuzhiyun
panfrost_scheduler_start(struct panfrost_queue_state * queue)414*4882a593Smuzhiyun static void panfrost_scheduler_start(struct panfrost_queue_state *queue)
415*4882a593Smuzhiyun {
416*4882a593Smuzhiyun enum panfrost_queue_status old_status;
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun mutex_lock(&queue->lock);
419*4882a593Smuzhiyun old_status = atomic_xchg(&queue->status,
420*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_STARTING);
421*4882a593Smuzhiyun WARN_ON(old_status != PANFROST_QUEUE_STATUS_STOPPED);
422*4882a593Smuzhiyun
423*4882a593Smuzhiyun /* Restore the original timeout before starting the scheduler. */
424*4882a593Smuzhiyun queue->sched.timeout = msecs_to_jiffies(JOB_TIMEOUT_MS);
425*4882a593Smuzhiyun drm_sched_resubmit_jobs(&queue->sched);
426*4882a593Smuzhiyun drm_sched_start(&queue->sched, true);
427*4882a593Smuzhiyun old_status = atomic_xchg(&queue->status,
428*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_ACTIVE);
429*4882a593Smuzhiyun if (old_status == PANFROST_QUEUE_STATUS_FAULT_PENDING)
430*4882a593Smuzhiyun drm_sched_fault(&queue->sched);
431*4882a593Smuzhiyun
432*4882a593Smuzhiyun mutex_unlock(&queue->lock);
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun
panfrost_job_timedout(struct drm_sched_job * sched_job)435*4882a593Smuzhiyun static void panfrost_job_timedout(struct drm_sched_job *sched_job)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun struct panfrost_job *job = to_panfrost_job(sched_job);
438*4882a593Smuzhiyun struct panfrost_device *pfdev = job->pfdev;
439*4882a593Smuzhiyun int js = panfrost_job_get_slot(job);
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun /*
442*4882a593Smuzhiyun * If the GPU managed to complete this jobs fence, the timeout is
443*4882a593Smuzhiyun * spurious. Bail out.
444*4882a593Smuzhiyun */
445*4882a593Smuzhiyun if (dma_fence_is_signaled(job->done_fence))
446*4882a593Smuzhiyun return;
447*4882a593Smuzhiyun
448*4882a593Smuzhiyun dev_err(pfdev->dev, "gpu sched timeout, js=%d, config=0x%x, status=0x%x, head=0x%x, tail=0x%x, sched_job=%p",
449*4882a593Smuzhiyun js,
450*4882a593Smuzhiyun job_read(pfdev, JS_CONFIG(js)),
451*4882a593Smuzhiyun job_read(pfdev, JS_STATUS(js)),
452*4882a593Smuzhiyun job_read(pfdev, JS_HEAD_LO(js)),
453*4882a593Smuzhiyun job_read(pfdev, JS_TAIL_LO(js)),
454*4882a593Smuzhiyun sched_job);
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun /* Scheduler is already stopped, nothing to do. */
457*4882a593Smuzhiyun if (!panfrost_scheduler_stop(&pfdev->js->queue[js], sched_job))
458*4882a593Smuzhiyun return;
459*4882a593Smuzhiyun
460*4882a593Smuzhiyun /* Schedule a reset if there's no reset in progress. */
461*4882a593Smuzhiyun if (!atomic_xchg(&pfdev->reset.pending, 1))
462*4882a593Smuzhiyun schedule_work(&pfdev->reset.work);
463*4882a593Smuzhiyun }
464*4882a593Smuzhiyun
465*4882a593Smuzhiyun static const struct drm_sched_backend_ops panfrost_sched_ops = {
466*4882a593Smuzhiyun .dependency = panfrost_job_dependency,
467*4882a593Smuzhiyun .run_job = panfrost_job_run,
468*4882a593Smuzhiyun .timedout_job = panfrost_job_timedout,
469*4882a593Smuzhiyun .free_job = panfrost_job_free
470*4882a593Smuzhiyun };
471*4882a593Smuzhiyun
panfrost_job_irq_handler(int irq,void * data)472*4882a593Smuzhiyun static irqreturn_t panfrost_job_irq_handler(int irq, void *data)
473*4882a593Smuzhiyun {
474*4882a593Smuzhiyun struct panfrost_device *pfdev = data;
475*4882a593Smuzhiyun u32 status = job_read(pfdev, JOB_INT_STAT);
476*4882a593Smuzhiyun int j;
477*4882a593Smuzhiyun
478*4882a593Smuzhiyun dev_dbg(pfdev->dev, "jobslot irq status=%x\n", status);
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun if (!status)
481*4882a593Smuzhiyun return IRQ_NONE;
482*4882a593Smuzhiyun
483*4882a593Smuzhiyun pm_runtime_mark_last_busy(pfdev->dev);
484*4882a593Smuzhiyun
485*4882a593Smuzhiyun for (j = 0; status; j++) {
486*4882a593Smuzhiyun u32 mask = MK_JS_MASK(j);
487*4882a593Smuzhiyun
488*4882a593Smuzhiyun if (!(status & mask))
489*4882a593Smuzhiyun continue;
490*4882a593Smuzhiyun
491*4882a593Smuzhiyun job_write(pfdev, JOB_INT_CLEAR, mask);
492*4882a593Smuzhiyun
493*4882a593Smuzhiyun if (status & JOB_INT_MASK_ERR(j)) {
494*4882a593Smuzhiyun enum panfrost_queue_status old_status;
495*4882a593Smuzhiyun
496*4882a593Smuzhiyun job_write(pfdev, JS_COMMAND_NEXT(j), JS_COMMAND_NOP);
497*4882a593Smuzhiyun
498*4882a593Smuzhiyun dev_err(pfdev->dev, "js fault, js=%d, status=%s, head=0x%x, tail=0x%x",
499*4882a593Smuzhiyun j,
500*4882a593Smuzhiyun panfrost_exception_name(pfdev, job_read(pfdev, JS_STATUS(j))),
501*4882a593Smuzhiyun job_read(pfdev, JS_HEAD_LO(j)),
502*4882a593Smuzhiyun job_read(pfdev, JS_TAIL_LO(j)));
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun /*
505*4882a593Smuzhiyun * When the queue is being restarted we don't report
506*4882a593Smuzhiyun * faults directly to avoid races between the timeout
507*4882a593Smuzhiyun * and reset handlers. panfrost_scheduler_start() will
508*4882a593Smuzhiyun * call drm_sched_fault() after the queue has been
509*4882a593Smuzhiyun * started if status == FAULT_PENDING.
510*4882a593Smuzhiyun */
511*4882a593Smuzhiyun old_status = atomic_cmpxchg(&pfdev->js->queue[j].status,
512*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_STARTING,
513*4882a593Smuzhiyun PANFROST_QUEUE_STATUS_FAULT_PENDING);
514*4882a593Smuzhiyun if (old_status == PANFROST_QUEUE_STATUS_ACTIVE)
515*4882a593Smuzhiyun drm_sched_fault(&pfdev->js->queue[j].sched);
516*4882a593Smuzhiyun }
517*4882a593Smuzhiyun
518*4882a593Smuzhiyun if (status & JOB_INT_MASK_DONE(j)) {
519*4882a593Smuzhiyun struct panfrost_job *job;
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun spin_lock(&pfdev->js->job_lock);
522*4882a593Smuzhiyun job = pfdev->jobs[j];
523*4882a593Smuzhiyun /* Only NULL if job timeout occurred */
524*4882a593Smuzhiyun if (job) {
525*4882a593Smuzhiyun pfdev->jobs[j] = NULL;
526*4882a593Smuzhiyun
527*4882a593Smuzhiyun panfrost_mmu_as_put(pfdev, job->file_priv->mmu);
528*4882a593Smuzhiyun panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
529*4882a593Smuzhiyun
530*4882a593Smuzhiyun dma_fence_signal_locked(job->done_fence);
531*4882a593Smuzhiyun pm_runtime_put_autosuspend(pfdev->dev);
532*4882a593Smuzhiyun }
533*4882a593Smuzhiyun spin_unlock(&pfdev->js->job_lock);
534*4882a593Smuzhiyun }
535*4882a593Smuzhiyun
536*4882a593Smuzhiyun status &= ~mask;
537*4882a593Smuzhiyun }
538*4882a593Smuzhiyun
539*4882a593Smuzhiyun return IRQ_HANDLED;
540*4882a593Smuzhiyun }
541*4882a593Smuzhiyun
panfrost_reset(struct work_struct * work)542*4882a593Smuzhiyun static void panfrost_reset(struct work_struct *work)
543*4882a593Smuzhiyun {
544*4882a593Smuzhiyun struct panfrost_device *pfdev = container_of(work,
545*4882a593Smuzhiyun struct panfrost_device,
546*4882a593Smuzhiyun reset.work);
547*4882a593Smuzhiyun unsigned long flags;
548*4882a593Smuzhiyun unsigned int i;
549*4882a593Smuzhiyun bool cookie;
550*4882a593Smuzhiyun
551*4882a593Smuzhiyun cookie = dma_fence_begin_signalling();
552*4882a593Smuzhiyun for (i = 0; i < NUM_JOB_SLOTS; i++) {
553*4882a593Smuzhiyun /*
554*4882a593Smuzhiyun * We want pending timeouts to be handled before we attempt
555*4882a593Smuzhiyun * to stop the scheduler. If we don't do that and the timeout
556*4882a593Smuzhiyun * handler is in flight, it might have removed the bad job
557*4882a593Smuzhiyun * from the list, and we'll lose this job if the reset handler
558*4882a593Smuzhiyun * enters the critical section in panfrost_scheduler_stop()
559*4882a593Smuzhiyun * before the timeout handler.
560*4882a593Smuzhiyun *
561*4882a593Smuzhiyun * Timeout is set to MAX_SCHEDULE_TIMEOUT - 1 because we need
562*4882a593Smuzhiyun * something big enough to make sure the timer will not expire
563*4882a593Smuzhiyun * before we manage to stop the scheduler, but we can't use
564*4882a593Smuzhiyun * MAX_SCHEDULE_TIMEOUT because drm_sched_get_cleanup_job()
565*4882a593Smuzhiyun * considers that as 'timer is not running' and will dequeue
566*4882a593Smuzhiyun * the job without making sure the timeout handler is not
567*4882a593Smuzhiyun * running.
568*4882a593Smuzhiyun */
569*4882a593Smuzhiyun pfdev->js->queue[i].sched.timeout = MAX_SCHEDULE_TIMEOUT - 1;
570*4882a593Smuzhiyun cancel_delayed_work_sync(&pfdev->js->queue[i].sched.work_tdr);
571*4882a593Smuzhiyun panfrost_scheduler_stop(&pfdev->js->queue[i], NULL);
572*4882a593Smuzhiyun }
573*4882a593Smuzhiyun
574*4882a593Smuzhiyun /* All timers have been stopped, we can safely reset the pending state. */
575*4882a593Smuzhiyun atomic_set(&pfdev->reset.pending, 0);
576*4882a593Smuzhiyun
577*4882a593Smuzhiyun spin_lock_irqsave(&pfdev->js->job_lock, flags);
578*4882a593Smuzhiyun for (i = 0; i < NUM_JOB_SLOTS; i++) {
579*4882a593Smuzhiyun if (pfdev->jobs[i]) {
580*4882a593Smuzhiyun pm_runtime_put_noidle(pfdev->dev);
581*4882a593Smuzhiyun panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
582*4882a593Smuzhiyun pfdev->jobs[i] = NULL;
583*4882a593Smuzhiyun }
584*4882a593Smuzhiyun }
585*4882a593Smuzhiyun spin_unlock_irqrestore(&pfdev->js->job_lock, flags);
586*4882a593Smuzhiyun
587*4882a593Smuzhiyun panfrost_device_reset(pfdev);
588*4882a593Smuzhiyun
589*4882a593Smuzhiyun for (i = 0; i < NUM_JOB_SLOTS; i++)
590*4882a593Smuzhiyun panfrost_scheduler_start(&pfdev->js->queue[i]);
591*4882a593Smuzhiyun
592*4882a593Smuzhiyun dma_fence_end_signalling(cookie);
593*4882a593Smuzhiyun }
594*4882a593Smuzhiyun
panfrost_job_init(struct panfrost_device * pfdev)595*4882a593Smuzhiyun int panfrost_job_init(struct panfrost_device *pfdev)
596*4882a593Smuzhiyun {
597*4882a593Smuzhiyun struct panfrost_job_slot *js;
598*4882a593Smuzhiyun int ret, j, irq;
599*4882a593Smuzhiyun
600*4882a593Smuzhiyun INIT_WORK(&pfdev->reset.work, panfrost_reset);
601*4882a593Smuzhiyun
602*4882a593Smuzhiyun pfdev->js = js = devm_kzalloc(pfdev->dev, sizeof(*js), GFP_KERNEL);
603*4882a593Smuzhiyun if (!js)
604*4882a593Smuzhiyun return -ENOMEM;
605*4882a593Smuzhiyun
606*4882a593Smuzhiyun spin_lock_init(&js->job_lock);
607*4882a593Smuzhiyun
608*4882a593Smuzhiyun irq = platform_get_irq_byname(to_platform_device(pfdev->dev), "job");
609*4882a593Smuzhiyun if (irq <= 0)
610*4882a593Smuzhiyun return -ENODEV;
611*4882a593Smuzhiyun
612*4882a593Smuzhiyun ret = devm_request_irq(pfdev->dev, irq, panfrost_job_irq_handler,
613*4882a593Smuzhiyun IRQF_SHARED, KBUILD_MODNAME "-job", pfdev);
614*4882a593Smuzhiyun if (ret) {
615*4882a593Smuzhiyun dev_err(pfdev->dev, "failed to request job irq");
616*4882a593Smuzhiyun return ret;
617*4882a593Smuzhiyun }
618*4882a593Smuzhiyun
619*4882a593Smuzhiyun for (j = 0; j < NUM_JOB_SLOTS; j++) {
620*4882a593Smuzhiyun mutex_init(&js->queue[j].lock);
621*4882a593Smuzhiyun
622*4882a593Smuzhiyun js->queue[j].fence_context = dma_fence_context_alloc(1);
623*4882a593Smuzhiyun
624*4882a593Smuzhiyun ret = drm_sched_init(&js->queue[j].sched,
625*4882a593Smuzhiyun &panfrost_sched_ops,
626*4882a593Smuzhiyun 1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS),
627*4882a593Smuzhiyun "pan_js");
628*4882a593Smuzhiyun if (ret) {
629*4882a593Smuzhiyun dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
630*4882a593Smuzhiyun goto err_sched;
631*4882a593Smuzhiyun }
632*4882a593Smuzhiyun }
633*4882a593Smuzhiyun
634*4882a593Smuzhiyun panfrost_job_enable_interrupts(pfdev);
635*4882a593Smuzhiyun
636*4882a593Smuzhiyun return 0;
637*4882a593Smuzhiyun
638*4882a593Smuzhiyun err_sched:
639*4882a593Smuzhiyun for (j--; j >= 0; j--)
640*4882a593Smuzhiyun drm_sched_fini(&js->queue[j].sched);
641*4882a593Smuzhiyun
642*4882a593Smuzhiyun return ret;
643*4882a593Smuzhiyun }
644*4882a593Smuzhiyun
panfrost_job_fini(struct panfrost_device * pfdev)645*4882a593Smuzhiyun void panfrost_job_fini(struct panfrost_device *pfdev)
646*4882a593Smuzhiyun {
647*4882a593Smuzhiyun struct panfrost_job_slot *js = pfdev->js;
648*4882a593Smuzhiyun int j;
649*4882a593Smuzhiyun
650*4882a593Smuzhiyun job_write(pfdev, JOB_INT_MASK, 0);
651*4882a593Smuzhiyun
652*4882a593Smuzhiyun for (j = 0; j < NUM_JOB_SLOTS; j++) {
653*4882a593Smuzhiyun drm_sched_fini(&js->queue[j].sched);
654*4882a593Smuzhiyun mutex_destroy(&js->queue[j].lock);
655*4882a593Smuzhiyun }
656*4882a593Smuzhiyun
657*4882a593Smuzhiyun }
658*4882a593Smuzhiyun
panfrost_job_open(struct panfrost_file_priv * panfrost_priv)659*4882a593Smuzhiyun int panfrost_job_open(struct panfrost_file_priv *panfrost_priv)
660*4882a593Smuzhiyun {
661*4882a593Smuzhiyun struct panfrost_device *pfdev = panfrost_priv->pfdev;
662*4882a593Smuzhiyun struct panfrost_job_slot *js = pfdev->js;
663*4882a593Smuzhiyun struct drm_gpu_scheduler *sched;
664*4882a593Smuzhiyun int ret, i;
665*4882a593Smuzhiyun
666*4882a593Smuzhiyun for (i = 0; i < NUM_JOB_SLOTS; i++) {
667*4882a593Smuzhiyun sched = &js->queue[i].sched;
668*4882a593Smuzhiyun ret = drm_sched_entity_init(&panfrost_priv->sched_entity[i],
669*4882a593Smuzhiyun DRM_SCHED_PRIORITY_NORMAL, &sched,
670*4882a593Smuzhiyun 1, NULL);
671*4882a593Smuzhiyun if (WARN_ON(ret))
672*4882a593Smuzhiyun return ret;
673*4882a593Smuzhiyun }
674*4882a593Smuzhiyun return 0;
675*4882a593Smuzhiyun }
676*4882a593Smuzhiyun
panfrost_job_close(struct panfrost_file_priv * panfrost_priv)677*4882a593Smuzhiyun void panfrost_job_close(struct panfrost_file_priv *panfrost_priv)
678*4882a593Smuzhiyun {
679*4882a593Smuzhiyun int i;
680*4882a593Smuzhiyun
681*4882a593Smuzhiyun for (i = 0; i < NUM_JOB_SLOTS; i++)
682*4882a593Smuzhiyun drm_sched_entity_destroy(&panfrost_priv->sched_entity[i]);
683*4882a593Smuzhiyun }
684*4882a593Smuzhiyun
panfrost_job_is_idle(struct panfrost_device * pfdev)685*4882a593Smuzhiyun int panfrost_job_is_idle(struct panfrost_device *pfdev)
686*4882a593Smuzhiyun {
687*4882a593Smuzhiyun struct panfrost_job_slot *js = pfdev->js;
688*4882a593Smuzhiyun int i;
689*4882a593Smuzhiyun
690*4882a593Smuzhiyun for (i = 0; i < NUM_JOB_SLOTS; i++) {
691*4882a593Smuzhiyun /* If there are any jobs in the HW queue, we're not idle */
692*4882a593Smuzhiyun if (atomic_read(&js->queue[i].sched.hw_rq_count))
693*4882a593Smuzhiyun return false;
694*4882a593Smuzhiyun }
695*4882a593Smuzhiyun
696*4882a593Smuzhiyun return true;
697*4882a593Smuzhiyun }
698