xref: /OK3568_Linux_fs/kernel/drivers/gpu/arm/bifrost/mali_kbase_dummy_job_wa.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
2 /*
3  *
4  * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
5  *
6  * This program is free software and is provided to you under the terms of the
7  * GNU General Public License version 2 as published by the Free Software
8  * Foundation, and any use by you of this program is subject to the terms
9  * of such GNU license.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, you can access it online at
18  * http://www.gnu.org/licenses/gpl-2.0.html.
19  *
20  */
21 
22 /*
23  * Implementation of the dummy job execution workaround for the GPU hang issue.
24  */
25 
26 #include <mali_kbase.h>
27 #include <device/mali_kbase_device.h>
28 #include <mali_kbase_dummy_job_wa.h>
29 
30 #include <linux/firmware.h>
31 #include <linux/delay.h>
32 
33 #define DUMMY_JOB_WA_BINARY_NAME "valhall-1691526.wa"
34 
35 struct wa_header {
36 	u16 signature;
37 	u16 version;
38 	u32 info_offset;
39 } __packed;
40 
41 struct wa_v2_info {
42 	u64 jc;
43 	u32 js;
44 	u32 blob_offset;
45 	u64 flags;
46 } __packed;
47 
48 struct wa_blob {
49 	u64 base;
50 	u32 size;
51 	u32 map_flags;
52 	u32 payload_offset;
53 	u32 blob_offset;
54 } __packed;
55 
in_range(const u8 * base,const u8 * end,off_t off,size_t sz)56 static bool in_range(const u8 *base, const u8 *end, off_t off, size_t sz)
57 {
58 	return !(end - base - off < sz);
59 }
60 
wait_any(struct kbase_device * kbdev,off_t offset,u32 bits)61 static u32 wait_any(struct kbase_device *kbdev, off_t offset, u32 bits)
62 {
63 	int loop;
64 	const int timeout = 100;
65 	u32 val;
66 
67 	for (loop = 0; loop < timeout; loop++) {
68 		val = kbase_reg_read(kbdev, offset);
69 		if (val & bits)
70 			break;
71 		udelay(10);
72 	}
73 
74 	if (loop == timeout) {
75 		dev_err(kbdev->dev,
76 			"Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
77 			(unsigned long)offset, (unsigned long)bits,
78 			(unsigned long)val);
79 	}
80 
81 	return (val & bits);
82 }
83 
wait(struct kbase_device * kbdev,off_t offset,u32 bits,bool set)84 static int wait(struct kbase_device *kbdev, off_t offset, u32 bits, bool set)
85 {
86 	int loop;
87 	const int timeout = 100;
88 	u32 val;
89 	u32 target = 0;
90 
91 	if (set)
92 		target = bits;
93 
94 	for (loop = 0; loop < timeout; loop++) {
95 		val = kbase_reg_read(kbdev, (offset));
96 		if ((val & bits) == target)
97 			break;
98 
99 		udelay(10);
100 	}
101 
102 	if (loop == timeout) {
103 		dev_err(kbdev->dev,
104 			"Timeout reading register 0x%lx, bits 0x%lx, last read was 0x%lx\n",
105 			(unsigned long)offset, (unsigned long)bits,
106 			(unsigned long)val);
107 		return -ETIMEDOUT;
108 	}
109 
110 	return 0;
111 }
112 
run_job(struct kbase_device * kbdev,int as,int slot,u64 cores,u64 jc)113 static inline int run_job(struct kbase_device *kbdev, int as, int slot,
114 			  u64 cores, u64 jc)
115 {
116 	u32 done;
117 
118 	/* setup job */
119 	kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_LO),
120 			jc & U32_MAX);
121 	kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_HEAD_NEXT_HI),
122 			jc >> 32);
123 	kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_LO),
124 			cores & U32_MAX);
125 	kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_AFFINITY_NEXT_HI),
126 			cores >> 32);
127 	kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_CONFIG_NEXT),
128 			JS_CONFIG_DISABLE_DESCRIPTOR_WR_BK | as);
129 
130 	/* go */
131 	kbase_reg_write(kbdev, JOB_SLOT_REG(slot, JS_COMMAND_NEXT),
132 			JS_COMMAND_START);
133 
134 	/* wait for the slot to finish (done, error) */
135 	done = wait_any(kbdev, JOB_CONTROL_REG(JOB_IRQ_RAWSTAT),
136 			(1ul << (16+slot)) | (1ul << slot));
137 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_CLEAR), done);
138 
139 	if (done != (1ul << slot)) {
140 		dev_err(kbdev->dev,
141 			"Failed to run WA job on slot %d cores 0x%llx: done 0x%lx\n",
142 			slot, (unsigned long long)cores,
143 			(unsigned long)done);
144 		dev_err(kbdev->dev, "JS_STATUS on failure: 0x%x\n",
145 			kbase_reg_read(kbdev, JOB_SLOT_REG(slot, JS_STATUS)));
146 
147 		return -EFAULT;
148 	} else {
149 		return 0;
150 	}
151 }
152 
153 /* To be called after power up & MMU init, but before everything else */
kbase_dummy_job_wa_execute(struct kbase_device * kbdev,u64 cores)154 int kbase_dummy_job_wa_execute(struct kbase_device *kbdev, u64 cores)
155 {
156 	int as;
157 	int slot;
158 	u64 jc;
159 	int failed = 0;
160 	int runs = 0;
161 	u32 old_gpu_mask;
162 	u32 old_job_mask;
163 
164 	if (!kbdev)
165 		return -EFAULT;
166 
167 	if (!kbdev->dummy_job_wa.ctx)
168 		return -EFAULT;
169 
170 	as = kbdev->dummy_job_wa.ctx->as_nr;
171 	slot = kbdev->dummy_job_wa.slot;
172 	jc = kbdev->dummy_job_wa.jc;
173 
174 	/* mask off all but MMU IRQs */
175 	old_gpu_mask = kbase_reg_read(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK));
176 	old_job_mask = kbase_reg_read(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK));
177 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), 0);
178 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), 0);
179 
180 	/* power up requested cores */
181 	kbase_reg_write(kbdev, SHADER_PWRON_LO, (cores & U32_MAX));
182 	kbase_reg_write(kbdev, SHADER_PWRON_HI, (cores >> 32));
183 
184 	if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_WAIT_POWERUP) {
185 		/* wait for power-ups */
186 		wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), true);
187 		if (cores >> 32)
188 			wait(kbdev, SHADER_READY_HI, (cores >> 32), true);
189 	}
190 
191 	if (kbdev->dummy_job_wa.flags & KBASE_DUMMY_JOB_WA_FLAG_SERIALIZE) {
192 		int i;
193 
194 		/* do for each requested core */
195 		for (i = 0; i < sizeof(cores) * 8; i++) {
196 			u64 affinity;
197 
198 			affinity = 1ull << i;
199 
200 			if (!(cores & affinity))
201 				continue;
202 
203 			if (run_job(kbdev, as, slot, affinity, jc))
204 				failed++;
205 			runs++;
206 		}
207 
208 	} else {
209 		if (run_job(kbdev, as, slot, cores, jc))
210 			failed++;
211 		runs++;
212 	}
213 
214 	if (kbdev->dummy_job_wa.flags &
215 			KBASE_DUMMY_JOB_WA_FLAG_LOGICAL_SHADER_POWER) {
216 		/* power off shader cores (to reduce any dynamic leakage) */
217 		kbase_reg_write(kbdev, SHADER_PWROFF_LO, (cores & U32_MAX));
218 		kbase_reg_write(kbdev, SHADER_PWROFF_HI, (cores >> 32));
219 
220 		/* wait for power off complete */
221 		wait(kbdev, SHADER_READY_LO, (cores & U32_MAX), false);
222 		wait(kbdev, SHADER_PWRTRANS_LO, (cores & U32_MAX), false);
223 		if (cores >> 32) {
224 			wait(kbdev, SHADER_READY_HI, (cores >> 32), false);
225 			wait(kbdev, SHADER_PWRTRANS_HI, (cores >> 32), false);
226 		}
227 		kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_CLEAR), U32_MAX);
228 	}
229 
230 	/* restore IRQ masks */
231 	kbase_reg_write(kbdev, GPU_CONTROL_REG(GPU_IRQ_MASK), old_gpu_mask);
232 	kbase_reg_write(kbdev, JOB_CONTROL_REG(JOB_IRQ_MASK), old_job_mask);
233 
234 	if (failed)
235 		dev_err(kbdev->dev,
236 			"WA complete with %d failures out of %d runs\n", failed,
237 			runs);
238 
239 	return failed ? -EFAULT : 0;
240 }
241 
dummy_job_wa_info_show(struct device * const dev,struct device_attribute * const attr,char * const buf)242 static ssize_t dummy_job_wa_info_show(struct device * const dev,
243 		struct device_attribute * const attr, char * const buf)
244 {
245 	struct kbase_device *const kbdev = dev_get_drvdata(dev);
246 	int err;
247 
248 	if (!kbdev || !kbdev->dummy_job_wa.ctx)
249 		return -ENODEV;
250 
251 	err = scnprintf(buf, PAGE_SIZE, "slot %u flags %llx\n",
252 			kbdev->dummy_job_wa.slot, kbdev->dummy_job_wa.flags);
253 
254 	return err;
255 }
256 
257 static DEVICE_ATTR_RO(dummy_job_wa_info);
258 
wa_blob_load_needed(struct kbase_device * kbdev)259 static bool wa_blob_load_needed(struct kbase_device *kbdev)
260 {
261 	if (of_machine_is_compatible("arm,juno"))
262 		return false;
263 
264 	if (kbase_hw_has_issue(kbdev, BASE_HW_ISSUE_TTRX_3485))
265 		return true;
266 
267 	return false;
268 }
269 
kbase_dummy_job_wa_load(struct kbase_device * kbdev)270 int kbase_dummy_job_wa_load(struct kbase_device *kbdev)
271 {
272 	const struct firmware *firmware;
273 	static const char wa_name[] = DUMMY_JOB_WA_BINARY_NAME;
274 	const u32 signature = 0x4157;
275 	const u32 version = 2;
276 	const u8 *fw_end;
277 	const u8 *fw;
278 	const struct wa_header *header;
279 	const struct wa_v2_info *v2_info;
280 	u32 blob_offset;
281 	int err;
282 	struct kbase_context *kctx;
283 
284 	/* Calls to this function are inherently asynchronous, with respect to
285 	 * MMU operations.
286 	 */
287 	const enum kbase_caller_mmu_sync_info mmu_sync_info = CALLER_MMU_ASYNC;
288 
289 	lockdep_assert_held(&kbdev->fw_load_lock);
290 
291 	if (!wa_blob_load_needed(kbdev))
292 		return 0;
293 
294 	/* load the wa */
295 	err = request_firmware(&firmware, wa_name, kbdev->dev);
296 
297 	if (err) {
298 		dev_err(kbdev->dev, "WA blob missing. Please refer to the Arm Mali DDK Valhall Release Notes, "
299 				    "Part number DC-06002 or contact support-mali@arm.com - driver probe will be failed");
300 		return -ENODEV;
301 	}
302 
303 	kctx = kbase_create_context(kbdev, true,
304 				    BASE_CONTEXT_CREATE_FLAG_NONE, 0,
305 				    NULL);
306 
307 	if (!kctx) {
308 		dev_err(kbdev->dev, "Failed to create WA context\n");
309 		goto no_ctx;
310 	}
311 
312 	fw = firmware->data;
313 	fw_end = fw + firmware->size;
314 
315 	dev_dbg(kbdev->dev, "Loaded firmware of size %zu bytes\n",
316 		firmware->size);
317 
318 	if (!in_range(fw, fw_end, 0, sizeof(*header))) {
319 		dev_err(kbdev->dev, "WA too small\n");
320 		goto bad_fw;
321 	}
322 
323 	header = (const struct wa_header *)(fw + 0);
324 
325 	if (header->signature != signature) {
326 		dev_err(kbdev->dev, "WA signature failure: 0x%lx\n",
327 			(unsigned long)header->signature);
328 		goto bad_fw;
329 	}
330 
331 	if (header->version != version) {
332 		dev_err(kbdev->dev, "WA version 0x%lx not supported\n",
333 			(unsigned long)header->version);
334 		goto bad_fw;
335 	}
336 
337 	if (!in_range(fw, fw_end, header->info_offset, sizeof(*v2_info))) {
338 		dev_err(kbdev->dev, "WA info offset out of bounds\n");
339 		goto bad_fw;
340 	}
341 
342 	v2_info = (const struct wa_v2_info *)(fw + header->info_offset);
343 
344 	if (v2_info->flags & ~KBASE_DUMMY_JOB_WA_FLAGS) {
345 		dev_err(kbdev->dev, "Unsupported WA flag(s): 0x%llx\n",
346 			(unsigned long long)v2_info->flags);
347 		goto bad_fw;
348 	}
349 
350 	kbdev->dummy_job_wa.slot = v2_info->js;
351 	kbdev->dummy_job_wa.jc = v2_info->jc;
352 	kbdev->dummy_job_wa.flags = v2_info->flags;
353 
354 	blob_offset = v2_info->blob_offset;
355 
356 	while (blob_offset) {
357 		const struct wa_blob *blob;
358 		size_t nr_pages;
359 		u64 flags;
360 		u64 gpu_va;
361 		struct kbase_va_region *va_region;
362 
363 		if (!in_range(fw, fw_end, blob_offset, sizeof(*blob))) {
364 			dev_err(kbdev->dev, "Blob offset out-of-range: 0x%lx\n",
365 				(unsigned long)blob_offset);
366 			goto bad_fw;
367 		}
368 
369 		blob = (const struct wa_blob *)(fw + blob_offset);
370 		if (!in_range(fw, fw_end, blob->payload_offset, blob->size)) {
371 			dev_err(kbdev->dev, "Payload out-of-bounds\n");
372 			goto bad_fw;
373 		}
374 
375 		gpu_va = blob->base;
376 		if (PAGE_ALIGN(gpu_va) != gpu_va) {
377 			dev_err(kbdev->dev, "blob not page aligned\n");
378 			goto bad_fw;
379 		}
380 		nr_pages = PFN_UP(blob->size);
381 		flags = blob->map_flags | BASE_MEM_FLAG_MAP_FIXED;
382 
383 		va_region = kbase_mem_alloc(kctx, nr_pages, nr_pages, 0, &flags,
384 					    &gpu_va, mmu_sync_info);
385 
386 		if (!va_region) {
387 			dev_err(kbdev->dev, "Failed to allocate for blob\n");
388 		} else {
389 			struct kbase_vmap_struct vmap = { 0 };
390 			const u8 *payload;
391 			void *dst;
392 
393 			/* copy the payload,  */
394 			payload = fw + blob->payload_offset;
395 
396 			dst = kbase_vmap(kctx,
397 					 va_region->start_pfn << PAGE_SHIFT,
398 					 nr_pages << PAGE_SHIFT, &vmap);
399 
400 			if (dst) {
401 				memcpy(dst, payload, blob->size);
402 				kbase_vunmap(kctx, &vmap);
403 			} else {
404 				dev_err(kbdev->dev,
405 					"Failed to copy payload\n");
406 			}
407 
408 		}
409 		blob_offset = blob->blob_offset; /* follow chain */
410 	}
411 
412 	release_firmware(firmware);
413 
414 	kbasep_js_schedule_privileged_ctx(kbdev, kctx);
415 
416 	kbdev->dummy_job_wa.ctx = kctx;
417 
418 	err = sysfs_create_file(&kbdev->dev->kobj,
419 				&dev_attr_dummy_job_wa_info.attr);
420 	if (err)
421 		dev_err(kbdev->dev, "SysFS file creation for dummy job wa failed\n");
422 
423 	return 0;
424 
425 bad_fw:
426 	kbase_destroy_context(kctx);
427 no_ctx:
428 	release_firmware(firmware);
429 	return -EFAULT;
430 }
431 
kbase_dummy_job_wa_cleanup(struct kbase_device * kbdev)432 void kbase_dummy_job_wa_cleanup(struct kbase_device *kbdev)
433 {
434 	struct kbase_context *wa_ctx;
435 
436 	/* return if the dummy job has not been loaded */
437 	if (kbdev->dummy_job_wa_loaded == false)
438 		return;
439 
440 	/* Can be safely called even if the file wasn't created on probe */
441 	sysfs_remove_file(&kbdev->dev->kobj, &dev_attr_dummy_job_wa_info.attr);
442 
443 	wa_ctx = READ_ONCE(kbdev->dummy_job_wa.ctx);
444 	WRITE_ONCE(kbdev->dummy_job_wa.ctx, NULL);
445 	/* make this write visible before we tear down the ctx */
446 	smp_mb();
447 
448 	if (wa_ctx) {
449 		kbasep_js_release_privileged_ctx(kbdev, wa_ctx);
450 		kbase_destroy_context(wa_ctx);
451 	}
452 }
453