1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3 *
4 * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
5 *
6 * This program is free software and is provided to you under the terms of the
7 * GNU General Public License version 2 as published by the Free Software
8 * Foundation, and any use by you of this program is subject to the terms
9 * of such GNU license.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, you can access it online at
18 * http://www.gnu.org/licenses/gpl-2.0.html.
19 *
20 */
21
22 /*
23 * Implementation of the dummy job execution workaround for the GPU hang issue.
24 */
25
26 #include <mali_kbase.h>
27 #include <device/mali_kbase_device.h>
28 #include <mali_kbase_dummy_job_wa.h>
29
30 #include <linux/firmware.h>
31 #include <linux/delay.h>
32
33 #define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa"
34
35 struct wa_header {
36 u16 signature;
37 u16 version;
38 u32 info_offset;
39 } __packed;
40
41 struct wa_v2_info {
42 u64 jc;
43 u32 js;
44 u32 blob_offset;
45 u64 flags;
46 } __packed;
47
48 struct wa_blob {
49 u64 base;
50 u32 size;
51 u32 map_flags;
52 u32 payload_offset;
53 u32 blob_offset;
54 } __packed;
55
in_range(const u8 * base,const u8 * end,off_t off,size_t sz)56 static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz)
57 {
58 return !(end - base - off < sz);
59 }
60
wait_any(struct kbase_device * kbdev,off_t offset,u32 bits)61 static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits)
62 {
63 int loop;
64 const int timeout = 100;
65 u32 val;
66
67 for (loop = 0; loop < timeout; loop++) {
68 val = kbase_reg_read(kbdev, offset);
69 if (val & bits)
70 break;
71 udelay(10);
72 }
73
74 if (loop == timeout) {
75 dev_err(kbdev->dev,
76 "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
77 (unsigned long)offset, (unsigned long)bits,
78 (unsigned long)val);
79 }
80
81 return (val & bits);
82 }
83
wait(struct kbase_device * kbdev,off_t offset,u32 bits,bool set)84 static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set)
85 {
86 int loop;
87 const int timeout = 100;
88 u32 val;
89 u32 target = 0;
90
91 if (set)
92 target = bits;
93
94 for (loop = 0; loop < timeout; loop++) {
95 val = kbase_reg_read(kbdev, (offset));
96 if ((val & bits) == target)
97 break;
98
99 udelay(10);
100 }
101
102 if (loop == timeout) {
103 dev_err(kbdev->dev,
104 "Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
105 (unsigned long)offset, (unsigned long)bits,
106 (unsigned long)val);
107 return -ETIMEDOUT;
108 }
109
110 return 0;
111 }
112
run_job(struct kbase_device * kbdev,int as,int slot,u64 cores,u64 jc)113 static inline int run_job(struct kbase_device *kbdev, int as, int slot,
114 u64 cores, u64 jc)
115 {
116 u32 done;
117
118 /* setup job */
119 kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO),
120 jc & U32_MAX);
121 kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI),
122 jc >> 32);
123 kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO),
124 cores & U32_MAX);
125 kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI),
126 cores >> 32);
127 kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT),
128 JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as);
129
130 /* go */
131 kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT),
132 JS_COMMAND_START);
133
134 /* wait for the slot to finish (done, error) */
135 done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT),
136 (1ul << (16+slot)) | (1ul << slot));
137 kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done);
138
139 if (done != (1ul << slot)) {
140 dev_err(kbdev->dev,
141 "Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n",
142 slot, (unsigned long long)cores,
143 (unsigned long)done);
144 dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n",
145 kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS)));
146
147 return -EFAULT;
148 } else {
149 return 0;
150 }
151 }
152
153 /* To be called after power up & MMU init, but before everything else */
kbase_dummy_job_wa_execute(struct kbase_device * kbdev,u64 cores)154 int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
155 {
156 int as;
157 int slot;
158 u64 jc;
159 int failed = 0;
160 int runs = 0;
161 u32 old_gpu_mask;
162 u32 old_job_mask;
163
164 if (!kbdev)
165 return -EFAULT;
166
167 if (!kbdev->dummy_job_wa.ctx)
168 return -EFAULT;
169
170 as = kbdev->dummy_job_wa.ctx->as_nr;
171 slot = kbdev->dummy_job_wa.slot;
172 jc = kbdev->dummy_job_wa.jc;
173
174 /* mask off all but MMU IRQs */
175 old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
176 old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK));
177 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
178 kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
179
180 /* power up requested cores */
181 kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX));
182 kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32));
183
184 if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) {
185 /* wait for power-ups */
186 wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true);
187 if (cores >> 32)
188 wait(kbdev, SHADER_READY_HI, (cores >> 32), true);
189 }
190
191 if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
192 int i;
193
194 /* do for each requested core */
195 for (i = 0; i < sizeof(cores) * 8; i++) {
196 u64 affinity;
197
198 affinity = 1ull << i;
199
200 if (!(cores & affinity))
201 continue;
202
203 if (run_job(kbdev, as, slot, affinity, jc))
204 failed++;
205 runs++;
206 }
207
208 } else {
209 if (run_job(kbdev, as, slot, cores, jc))
210 failed++;
211 runs++;
212 }
213
214 if (kbdev->dummy_job_wa.flags &
215 KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) {
216 /* power off shader cores (to reduce any dynamic leakage) */
217 kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX));
218 kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32));
219
220 /* wait for power off complete */
221 wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false);
222 wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false);
223 if (cores >> 32) {
224 wait(kbdev, SHADER_READY_HI, (cores >> 32), false);
225 wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false);
226 }
227 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX);
228 }
229
230 /* restore IRQ masks */
231 kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask);
232 kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask);
233
234 if (failed)
235 dev_err(kbdev->dev,
236 "WA complete with %d failures out of %d runs\n", failed,
237 runs);
238
239 return failed ? -EFAULT : 0;
240 }
241
dummy_job_wa_info_show(struct device * const dev,struct device_attribute * const attr,char * const buf)242 static ssize_t dummy_job_wa_info_show(struct device * const dev,
243 struct device_attribute * const attr, char * const buf)
244 {
245 struct kbase_device *const kbdev = dev_get_drvdata(dev);
246 int err;
247
248 if (!kbdev || !kbdev->dummy_job_wa.ctx)
249 return -ENODEV;
250
251 err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n",
252 kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags);
253
254 return err;
255 }
256
257 static DEVICE_ATTR_RO(dummy_job_wa_info);
258
wa_blob_load_needed(struct kbase_device * kbdev)259 static bool wa_blob_load_needed(struct kbase_device *kbdev)
260 {
261 if (of_machine_is_compatible("arm,juno"))
262 return false;
263
264 if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485))
265 return true;
266
267 return false;
268 }
269
kbase_dummy_job_wa_load(struct kbase_device * kbdev)270 int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
271 {
272 const struct firmware *firmware;
273 static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME;
274 const u32 signature = 0x4157;
275 const u32 version = 2;
276 const u8 *fw_end;
277 const u8 *fw;
278 const struct wa_header *header;
279 const struct wa_v2_info *v2_info;
280 u32 blob_offset;
281 int err;
282 struct kbase_context *kctx;
283
284 /* Calls to this function are inherently asynchronous, with respect to
285 * MMU operations.
286 */
287 const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
288
289 lockdep_assert_held(&kbdev->fw_load_lock);
290
291 if (!wa_blob_load_needed(kbdev))
292 return 0;
293
294 /* load the wa */
295 err = request_firmware(&firmware, wa_name, kbdev->dev);
296
297 if (err) {
298 dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, "
299 "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed");
300 return -ENODEV;
301 }
302
303 kctx = kbase_create_context(kbdev, true,
304 BASE_CONTEXT_CREATE_FLAG_NONE, 0,
305 NULL);
306
307 if (!kctx) {
308 dev_err(kbdev->dev, "Failed to create WA context\n");
309 goto no_ctx;
310 }
311
312 fw = firmware->data;
313 fw_end = fw + firmware->size;
314
315 dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n",
316 firmware->size);
317
318 if (!in_range(fw, fw_end, 0, sizeof(*header))) {
319 dev_err(kbdev->dev, "WA too small\n");
320 goto bad_fw;
321 }
322
323 header = (const struct wa_header *)(fw + 0);
324
325 if (header->signature != signature) {
326 dev_err(kbdev->dev, "WA signature failure: 0x%lx\n",
327 (unsigned long)header->signature);
328 goto bad_fw;
329 }
330
331 if (header->version != version) {
332 dev_err(kbdev->dev, "WA version 0x%lx not supported\n",
333 (unsigned long)header->version);
334 goto bad_fw;
335 }
336
337 if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) {
338 dev_err(kbdev->dev, "WA info offset out of bounds\n");
339 goto bad_fw;
340 }
341
342 v2_info = (const struct wa_v2_info *)(fw + header->info_offset);
343
344 if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) {
345 dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n",
346 (unsigned long long)v2_info->flags);
347 goto bad_fw;
348 }
349
350 kbdev->dummy_job_wa.slot = v2_info->js;
351 kbdev->dummy_job_wa.jc = v2_info->jc;
352 kbdev->dummy_job_wa.flags = v2_info->flags;
353
354 blob_offset = v2_info->blob_offset;
355
356 while (blob_offset) {
357 const struct wa_blob *blob;
358 size_t nr_pages;
359 u64 flags;
360 u64 gpu_va;
361 struct kbase_va_region *va_region;
362
363 if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) {
364 dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n",
365 (unsigned long)blob_offset);
366 goto bad_fw;
367 }
368
369 blob = (const struct wa_blob *)(fw + blob_offset);
370 if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) {
371 dev_err(kbdev->dev, "Payload out-of-bounds\n");
372 goto bad_fw;
373 }
374
375 gpu_va = blob->base;
376 if (PAGE_ALIGN(gpu_va) != gpu_va) {
377 dev_err(kbdev->dev, "blob not page aligned\n");
378 goto bad_fw;
379 }
380 nr_pages = PFN_UP(blob->size);
381 flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED;
382
383 va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
384 &gpu_va, mmu_sync_info);
385
386 if (!va_region) {
387 dev_err(kbdev->dev, "Failed to allocate for blob\n");
388 } else {
389 struct kbase_vmap_struct vmap = { 0 };
390 const u8 *payload;
391 void *dst;
392
393 /* copy the payload, */
394 payload = fw + blob->payload_offset;
395
396 dst = kbase_vmap(kctx,
397 va_region->start_pfn << PAGE_SHIFT,
398 nr_pages << PAGE_SHIFT, &vmap);
399
400 if (dst) {
401 memcpy(dst, payload, blob->size);
402 kbase_vunmap(kctx, &vmap);
403 } else {
404 dev_err(kbdev->dev,
405 "Failed to copy payload\n");
406 }
407
408 }
409 blob_offset = blob->blob_offset; /* follow chain */
410 }
411
412 release_firmware(firmware);
413
414 kbasep_js_schedule_privileged_ctx(kbdev, kctx);
415
416 kbdev->dummy_job_wa.ctx = kctx;
417
418 err = sysfs_create_file(&kbdev->dev->kobj,
419 &dev_attr_dummy_job_wa_info.attr);
420 if (err)
421 dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n");
422
423 return 0;
424
425 bad_fw:
426 kbase_destroy_context(kctx);
427 no_ctx:
428 release_firmware(firmware);
429 return -EFAULT;
430 }
431
kbase_dummy_job_wa_cleanup(struct kbase_device * kbdev)432 void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev)
433 {
434 struct kbase_context *wa_ctx;
435
436 /* return if the dummy job has not been loaded */
437 if (kbdev->dummy_job_wa_loaded == false)
438 return;
439
440 /* Can be safely called even if the file wasn't created on probe */
441 sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr);
442
443 wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx);
444 WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL);
445 /* make this write visible before we tear down the ctx */
446 smp_mb();
447
448 if (wa_ctx) {
449 kbasep_js_release_privileged_ctx(kbdev, wa_ctx);
450 kbase_destroy_context(wa_ctx);
451 }
452 }
453