xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/msm/adreno/adreno_gpu.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2013 Red Hat
4*4882a593Smuzhiyun  * Author: Rob Clark <robdclark@gmail.com>
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7*4882a593Smuzhiyun  */
8*4882a593Smuzhiyun 
9*4882a593Smuzhiyun #include <linux/ascii85.h>
10*4882a593Smuzhiyun #include <linux/interconnect.h>
11*4882a593Smuzhiyun #include <linux/qcom_scm.h>
12*4882a593Smuzhiyun #include <linux/kernel.h>
13*4882a593Smuzhiyun #include <linux/of_address.h>
14*4882a593Smuzhiyun #include <linux/pm_opp.h>
15*4882a593Smuzhiyun #include <linux/slab.h>
16*4882a593Smuzhiyun #include <linux/soc/qcom/mdt_loader.h>
17*4882a593Smuzhiyun #include <soc/qcom/ocmem.h>
18*4882a593Smuzhiyun #include "adreno_gpu.h"
19*4882a593Smuzhiyun #include "msm_gem.h"
20*4882a593Smuzhiyun #include "msm_mmu.h"
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun static bool zap_available = true;
23*4882a593Smuzhiyun 
zap_shader_load_mdt(struct msm_gpu * gpu,const char * fwname,u32 pasid)24*4882a593Smuzhiyun static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname,
25*4882a593Smuzhiyun 		u32 pasid)
26*4882a593Smuzhiyun {
27*4882a593Smuzhiyun 	struct device *dev = &gpu->pdev->dev;
28*4882a593Smuzhiyun 	const struct firmware *fw;
29*4882a593Smuzhiyun 	const char *signed_fwname = NULL;
30*4882a593Smuzhiyun 	struct device_node *np, *mem_np;
31*4882a593Smuzhiyun 	struct resource r;
32*4882a593Smuzhiyun 	phys_addr_t mem_phys;
33*4882a593Smuzhiyun 	ssize_t mem_size;
34*4882a593Smuzhiyun 	void *mem_region = NULL;
35*4882a593Smuzhiyun 	int ret;
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun 	if (!IS_ENABLED(CONFIG_ARCH_QCOM)) {
38*4882a593Smuzhiyun 		zap_available = false;
39*4882a593Smuzhiyun 		return -EINVAL;
40*4882a593Smuzhiyun 	}
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun 	np = of_get_child_by_name(dev->of_node, "zap-shader");
43*4882a593Smuzhiyun 	if (!np) {
44*4882a593Smuzhiyun 		zap_available = false;
45*4882a593Smuzhiyun 		return -ENODEV;
46*4882a593Smuzhiyun 	}
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun 	mem_np = of_parse_phandle(np, "memory-region", 0);
49*4882a593Smuzhiyun 	of_node_put(np);
50*4882a593Smuzhiyun 	if (!mem_np) {
51*4882a593Smuzhiyun 		zap_available = false;
52*4882a593Smuzhiyun 		return -EINVAL;
53*4882a593Smuzhiyun 	}
54*4882a593Smuzhiyun 
55*4882a593Smuzhiyun 	ret = of_address_to_resource(mem_np, 0, &r);
56*4882a593Smuzhiyun 	of_node_put(mem_np);
57*4882a593Smuzhiyun 	if (ret)
58*4882a593Smuzhiyun 		return ret;
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun 	mem_phys = r.start;
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 	/*
63*4882a593Smuzhiyun 	 * Check for a firmware-name property.  This is the new scheme
64*4882a593Smuzhiyun 	 * to handle firmware that may be signed with device specific
65*4882a593Smuzhiyun 	 * keys, allowing us to have a different zap fw path for different
66*4882a593Smuzhiyun 	 * devices.
67*4882a593Smuzhiyun 	 *
68*4882a593Smuzhiyun 	 * If the firmware-name property is found, we bypass the
69*4882a593Smuzhiyun 	 * adreno_request_fw() mechanism, because we don't need to handle
70*4882a593Smuzhiyun 	 * the /lib/firmware/qcom/... vs /lib/firmware/... case.
71*4882a593Smuzhiyun 	 *
72*4882a593Smuzhiyun 	 * If the firmware-name property is not found, for backwards
73*4882a593Smuzhiyun 	 * compatibility we fall back to the fwname from the gpulist
74*4882a593Smuzhiyun 	 * table.
75*4882a593Smuzhiyun 	 */
76*4882a593Smuzhiyun 	of_property_read_string_index(np, "firmware-name", 0, &signed_fwname);
77*4882a593Smuzhiyun 	if (signed_fwname) {
78*4882a593Smuzhiyun 		fwname = signed_fwname;
79*4882a593Smuzhiyun 		ret = request_firmware_direct(&fw, fwname, gpu->dev->dev);
80*4882a593Smuzhiyun 		if (ret)
81*4882a593Smuzhiyun 			fw = ERR_PTR(ret);
82*4882a593Smuzhiyun 	} else if (fwname) {
83*4882a593Smuzhiyun 		/* Request the MDT file from the default location: */
84*4882a593Smuzhiyun 		fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
85*4882a593Smuzhiyun 	} else {
86*4882a593Smuzhiyun 		/*
87*4882a593Smuzhiyun 		 * For new targets, we require the firmware-name property,
88*4882a593Smuzhiyun 		 * if a zap-shader is required, rather than falling back
89*4882a593Smuzhiyun 		 * to a firmware name specified in gpulist.
90*4882a593Smuzhiyun 		 *
91*4882a593Smuzhiyun 		 * Because the firmware is signed with a (potentially)
92*4882a593Smuzhiyun 		 * device specific key, having the name come from gpulist
93*4882a593Smuzhiyun 		 * was a bad idea, and is only provided for backwards
94*4882a593Smuzhiyun 		 * compatibility for older targets.
95*4882a593Smuzhiyun 		 */
96*4882a593Smuzhiyun 		return -ENODEV;
97*4882a593Smuzhiyun 	}
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun 	if (IS_ERR(fw)) {
100*4882a593Smuzhiyun 		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
101*4882a593Smuzhiyun 		return PTR_ERR(fw);
102*4882a593Smuzhiyun 	}
103*4882a593Smuzhiyun 
104*4882a593Smuzhiyun 	/* Figure out how much memory we need */
105*4882a593Smuzhiyun 	mem_size = qcom_mdt_get_size(fw);
106*4882a593Smuzhiyun 	if (mem_size < 0) {
107*4882a593Smuzhiyun 		ret = mem_size;
108*4882a593Smuzhiyun 		goto out;
109*4882a593Smuzhiyun 	}
110*4882a593Smuzhiyun 
111*4882a593Smuzhiyun 	if (mem_size > resource_size(&r)) {
112*4882a593Smuzhiyun 		DRM_DEV_ERROR(dev,
113*4882a593Smuzhiyun 			"memory region is too small to load the MDT\n");
114*4882a593Smuzhiyun 		ret = -E2BIG;
115*4882a593Smuzhiyun 		goto out;
116*4882a593Smuzhiyun 	}
117*4882a593Smuzhiyun 
118*4882a593Smuzhiyun 	/* Allocate memory for the firmware image */
119*4882a593Smuzhiyun 	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
120*4882a593Smuzhiyun 	if (!mem_region) {
121*4882a593Smuzhiyun 		ret = -ENOMEM;
122*4882a593Smuzhiyun 		goto out;
123*4882a593Smuzhiyun 	}
124*4882a593Smuzhiyun 
125*4882a593Smuzhiyun 	/*
126*4882a593Smuzhiyun 	 * Load the rest of the MDT
127*4882a593Smuzhiyun 	 *
128*4882a593Smuzhiyun 	 * Note that we could be dealing with two different paths, since
129*4882a593Smuzhiyun 	 * with upstream linux-firmware it would be in a qcom/ subdir..
130*4882a593Smuzhiyun 	 * adreno_request_fw() handles this, but qcom_mdt_load() does
131*4882a593Smuzhiyun 	 * not.  But since we've already gotten through adreno_request_fw()
132*4882a593Smuzhiyun 	 * we know which of the two cases it is:
133*4882a593Smuzhiyun 	 */
134*4882a593Smuzhiyun 	if (signed_fwname || (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY)) {
135*4882a593Smuzhiyun 		ret = qcom_mdt_load(dev, fw, fwname, pasid,
136*4882a593Smuzhiyun 				mem_region, mem_phys, mem_size, NULL);
137*4882a593Smuzhiyun 	} else {
138*4882a593Smuzhiyun 		char *newname;
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun 		newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 		ret = qcom_mdt_load(dev, fw, newname, pasid,
143*4882a593Smuzhiyun 				mem_region, mem_phys, mem_size, NULL);
144*4882a593Smuzhiyun 		kfree(newname);
145*4882a593Smuzhiyun 	}
146*4882a593Smuzhiyun 	if (ret)
147*4882a593Smuzhiyun 		goto out;
148*4882a593Smuzhiyun 
149*4882a593Smuzhiyun 	/* Send the image to the secure world */
150*4882a593Smuzhiyun 	ret = qcom_scm_pas_auth_and_reset(pasid);
151*4882a593Smuzhiyun 
152*4882a593Smuzhiyun 	/*
153*4882a593Smuzhiyun 	 * If the scm call returns -EOPNOTSUPP we assume that this target
154*4882a593Smuzhiyun 	 * doesn't need/support the zap shader so quietly fail
155*4882a593Smuzhiyun 	 */
156*4882a593Smuzhiyun 	if (ret == -EOPNOTSUPP)
157*4882a593Smuzhiyun 		zap_available = false;
158*4882a593Smuzhiyun 	else if (ret)
159*4882a593Smuzhiyun 		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun out:
162*4882a593Smuzhiyun 	if (mem_region)
163*4882a593Smuzhiyun 		memunmap(mem_region);
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	release_firmware(fw);
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun 	return ret;
168*4882a593Smuzhiyun }
169*4882a593Smuzhiyun 
adreno_zap_shader_load(struct msm_gpu * gpu,u32 pasid)170*4882a593Smuzhiyun int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid)
171*4882a593Smuzhiyun {
172*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
173*4882a593Smuzhiyun 	struct platform_device *pdev = gpu->pdev;
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	/* Short cut if we determine the zap shader isn't available/needed */
176*4882a593Smuzhiyun 	if (!zap_available)
177*4882a593Smuzhiyun 		return -ENODEV;
178*4882a593Smuzhiyun 
179*4882a593Smuzhiyun 	/* We need SCM to be able to load the firmware */
180*4882a593Smuzhiyun 	if (!qcom_scm_is_available()) {
181*4882a593Smuzhiyun 		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
182*4882a593Smuzhiyun 		return -EPROBE_DEFER;
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun struct msm_gem_address_space *
adreno_iommu_create_address_space(struct msm_gpu * gpu,struct platform_device * pdev)189*4882a593Smuzhiyun adreno_iommu_create_address_space(struct msm_gpu *gpu,
190*4882a593Smuzhiyun 		struct platform_device *pdev)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun 	struct iommu_domain *iommu;
193*4882a593Smuzhiyun 	struct msm_mmu *mmu;
194*4882a593Smuzhiyun 	struct msm_gem_address_space *aspace;
195*4882a593Smuzhiyun 	u64 start, size;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	iommu = iommu_domain_alloc(&platform_bus_type);
198*4882a593Smuzhiyun 	if (!iommu)
199*4882a593Smuzhiyun 		return NULL;
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	mmu = msm_iommu_new(&pdev->dev, iommu);
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	/*
204*4882a593Smuzhiyun 	 * Use the aperture start or SZ_16M, whichever is greater. This will
205*4882a593Smuzhiyun 	 * ensure that we align with the allocated pagetable range while still
206*4882a593Smuzhiyun 	 * allowing room in the lower 32 bits for GMEM and whatnot
207*4882a593Smuzhiyun 	 */
208*4882a593Smuzhiyun 	start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
209*4882a593Smuzhiyun 	size = iommu->geometry.aperture_end - start + 1;
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	aspace = msm_gem_address_space_create(mmu, "gpu",
212*4882a593Smuzhiyun 		start & GENMASK_ULL(48, 0), size);
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	if (IS_ERR(aspace) && !IS_ERR(mmu))
215*4882a593Smuzhiyun 		mmu->funcs->destroy(mmu);
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	return aspace;
218*4882a593Smuzhiyun }
219*4882a593Smuzhiyun 
adreno_get_param(struct msm_gpu * gpu,uint32_t param,uint64_t * value)220*4882a593Smuzhiyun int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
221*4882a593Smuzhiyun {
222*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
223*4882a593Smuzhiyun 
224*4882a593Smuzhiyun 	switch (param) {
225*4882a593Smuzhiyun 	case MSM_PARAM_GPU_ID:
226*4882a593Smuzhiyun 		*value = adreno_gpu->info->revn;
227*4882a593Smuzhiyun 		return 0;
228*4882a593Smuzhiyun 	case MSM_PARAM_GMEM_SIZE:
229*4882a593Smuzhiyun 		*value = adreno_gpu->gmem;
230*4882a593Smuzhiyun 		return 0;
231*4882a593Smuzhiyun 	case MSM_PARAM_GMEM_BASE:
232*4882a593Smuzhiyun 		*value = !adreno_is_a650(adreno_gpu) ? 0x100000 : 0;
233*4882a593Smuzhiyun 		return 0;
234*4882a593Smuzhiyun 	case MSM_PARAM_CHIP_ID:
235*4882a593Smuzhiyun 		*value = adreno_gpu->rev.patchid |
236*4882a593Smuzhiyun 				(adreno_gpu->rev.minor << 8) |
237*4882a593Smuzhiyun 				(adreno_gpu->rev.major << 16) |
238*4882a593Smuzhiyun 				(adreno_gpu->rev.core << 24);
239*4882a593Smuzhiyun 		return 0;
240*4882a593Smuzhiyun 	case MSM_PARAM_MAX_FREQ:
241*4882a593Smuzhiyun 		*value = adreno_gpu->base.fast_rate;
242*4882a593Smuzhiyun 		return 0;
243*4882a593Smuzhiyun 	case MSM_PARAM_TIMESTAMP:
244*4882a593Smuzhiyun 		if (adreno_gpu->funcs->get_timestamp) {
245*4882a593Smuzhiyun 			int ret;
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 			pm_runtime_get_sync(&gpu->pdev->dev);
248*4882a593Smuzhiyun 			ret = adreno_gpu->funcs->get_timestamp(gpu, value);
249*4882a593Smuzhiyun 			pm_runtime_put_autosuspend(&gpu->pdev->dev);
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 			return ret;
252*4882a593Smuzhiyun 		}
253*4882a593Smuzhiyun 		return -EINVAL;
254*4882a593Smuzhiyun 	case MSM_PARAM_NR_RINGS:
255*4882a593Smuzhiyun 		*value = gpu->nr_rings;
256*4882a593Smuzhiyun 		return 0;
257*4882a593Smuzhiyun 	case MSM_PARAM_PP_PGTABLE:
258*4882a593Smuzhiyun 		*value = 0;
259*4882a593Smuzhiyun 		return 0;
260*4882a593Smuzhiyun 	case MSM_PARAM_FAULTS:
261*4882a593Smuzhiyun 		*value = gpu->global_faults;
262*4882a593Smuzhiyun 		return 0;
263*4882a593Smuzhiyun 	default:
264*4882a593Smuzhiyun 		DBG("%s: invalid param: %u", gpu->name, param);
265*4882a593Smuzhiyun 		return -EINVAL;
266*4882a593Smuzhiyun 	}
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun const struct firmware *
adreno_request_fw(struct adreno_gpu * adreno_gpu,const char * fwname)270*4882a593Smuzhiyun adreno_request_fw(struct adreno_gpu *adreno_gpu, const char *fwname)
271*4882a593Smuzhiyun {
272*4882a593Smuzhiyun 	struct drm_device *drm = adreno_gpu->base.dev;
273*4882a593Smuzhiyun 	const struct firmware *fw = NULL;
274*4882a593Smuzhiyun 	char *newname;
275*4882a593Smuzhiyun 	int ret;
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
278*4882a593Smuzhiyun 	if (!newname)
279*4882a593Smuzhiyun 		return ERR_PTR(-ENOMEM);
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun 	/*
282*4882a593Smuzhiyun 	 * Try first to load from qcom/$fwfile using a direct load (to avoid
283*4882a593Smuzhiyun 	 * a potential timeout waiting for usermode helper)
284*4882a593Smuzhiyun 	 */
285*4882a593Smuzhiyun 	if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
286*4882a593Smuzhiyun 	    (adreno_gpu->fwloc == FW_LOCATION_NEW)) {
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun 		ret = request_firmware_direct(&fw, newname, drm->dev);
289*4882a593Smuzhiyun 		if (!ret) {
290*4882a593Smuzhiyun 			DRM_DEV_INFO(drm->dev, "loaded %s from new location\n",
291*4882a593Smuzhiyun 				newname);
292*4882a593Smuzhiyun 			adreno_gpu->fwloc = FW_LOCATION_NEW;
293*4882a593Smuzhiyun 			goto out;
294*4882a593Smuzhiyun 		} else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
295*4882a593Smuzhiyun 			DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n",
296*4882a593Smuzhiyun 				newname, ret);
297*4882a593Smuzhiyun 			fw = ERR_PTR(ret);
298*4882a593Smuzhiyun 			goto out;
299*4882a593Smuzhiyun 		}
300*4882a593Smuzhiyun 	}
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	/*
303*4882a593Smuzhiyun 	 * Then try the legacy location without qcom/ prefix
304*4882a593Smuzhiyun 	 */
305*4882a593Smuzhiyun 	if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
306*4882a593Smuzhiyun 	    (adreno_gpu->fwloc == FW_LOCATION_LEGACY)) {
307*4882a593Smuzhiyun 
308*4882a593Smuzhiyun 		ret = request_firmware_direct(&fw, fwname, drm->dev);
309*4882a593Smuzhiyun 		if (!ret) {
310*4882a593Smuzhiyun 			DRM_DEV_INFO(drm->dev, "loaded %s from legacy location\n",
311*4882a593Smuzhiyun 				newname);
312*4882a593Smuzhiyun 			adreno_gpu->fwloc = FW_LOCATION_LEGACY;
313*4882a593Smuzhiyun 			goto out;
314*4882a593Smuzhiyun 		} else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
315*4882a593Smuzhiyun 			DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n",
316*4882a593Smuzhiyun 				fwname, ret);
317*4882a593Smuzhiyun 			fw = ERR_PTR(ret);
318*4882a593Smuzhiyun 			goto out;
319*4882a593Smuzhiyun 		}
320*4882a593Smuzhiyun 	}
321*4882a593Smuzhiyun 
322*4882a593Smuzhiyun 	/*
323*4882a593Smuzhiyun 	 * Finally fall back to request_firmware() for cases where the
324*4882a593Smuzhiyun 	 * usermode helper is needed (I think mainly android)
325*4882a593Smuzhiyun 	 */
326*4882a593Smuzhiyun 	if ((adreno_gpu->fwloc == FW_LOCATION_UNKNOWN) ||
327*4882a593Smuzhiyun 	    (adreno_gpu->fwloc == FW_LOCATION_HELPER)) {
328*4882a593Smuzhiyun 
329*4882a593Smuzhiyun 		ret = request_firmware(&fw, newname, drm->dev);
330*4882a593Smuzhiyun 		if (!ret) {
331*4882a593Smuzhiyun 			DRM_DEV_INFO(drm->dev, "loaded %s with helper\n",
332*4882a593Smuzhiyun 				newname);
333*4882a593Smuzhiyun 			adreno_gpu->fwloc = FW_LOCATION_HELPER;
334*4882a593Smuzhiyun 			goto out;
335*4882a593Smuzhiyun 		} else if (adreno_gpu->fwloc != FW_LOCATION_UNKNOWN) {
336*4882a593Smuzhiyun 			DRM_DEV_ERROR(drm->dev, "failed to load %s: %d\n",
337*4882a593Smuzhiyun 				newname, ret);
338*4882a593Smuzhiyun 			fw = ERR_PTR(ret);
339*4882a593Smuzhiyun 			goto out;
340*4882a593Smuzhiyun 		}
341*4882a593Smuzhiyun 	}
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	DRM_DEV_ERROR(drm->dev, "failed to load %s\n", fwname);
344*4882a593Smuzhiyun 	fw = ERR_PTR(-ENOENT);
345*4882a593Smuzhiyun out:
346*4882a593Smuzhiyun 	kfree(newname);
347*4882a593Smuzhiyun 	return fw;
348*4882a593Smuzhiyun }
349*4882a593Smuzhiyun 
adreno_load_fw(struct adreno_gpu * adreno_gpu)350*4882a593Smuzhiyun int adreno_load_fw(struct adreno_gpu *adreno_gpu)
351*4882a593Smuzhiyun {
352*4882a593Smuzhiyun 	int i;
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++) {
355*4882a593Smuzhiyun 		const struct firmware *fw;
356*4882a593Smuzhiyun 
357*4882a593Smuzhiyun 		if (!adreno_gpu->info->fw[i])
358*4882a593Smuzhiyun 			continue;
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 		/* Skip if the firmware has already been loaded */
361*4882a593Smuzhiyun 		if (adreno_gpu->fw[i])
362*4882a593Smuzhiyun 			continue;
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 		fw = adreno_request_fw(adreno_gpu, adreno_gpu->info->fw[i]);
365*4882a593Smuzhiyun 		if (IS_ERR(fw))
366*4882a593Smuzhiyun 			return PTR_ERR(fw);
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun 		adreno_gpu->fw[i] = fw;
369*4882a593Smuzhiyun 	}
370*4882a593Smuzhiyun 
371*4882a593Smuzhiyun 	return 0;
372*4882a593Smuzhiyun }
373*4882a593Smuzhiyun 
adreno_fw_create_bo(struct msm_gpu * gpu,const struct firmware * fw,u64 * iova)374*4882a593Smuzhiyun struct drm_gem_object *adreno_fw_create_bo(struct msm_gpu *gpu,
375*4882a593Smuzhiyun 		const struct firmware *fw, u64 *iova)
376*4882a593Smuzhiyun {
377*4882a593Smuzhiyun 	struct drm_gem_object *bo;
378*4882a593Smuzhiyun 	void *ptr;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
381*4882a593Smuzhiyun 		MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	if (IS_ERR(ptr))
384*4882a593Smuzhiyun 		return ERR_CAST(ptr);
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 	memcpy(ptr, &fw->data[4], fw->size - 4);
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun 	msm_gem_put_vaddr(bo);
389*4882a593Smuzhiyun 
390*4882a593Smuzhiyun 	return bo;
391*4882a593Smuzhiyun }
392*4882a593Smuzhiyun 
adreno_hw_init(struct msm_gpu * gpu)393*4882a593Smuzhiyun int adreno_hw_init(struct msm_gpu *gpu)
394*4882a593Smuzhiyun {
395*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
396*4882a593Smuzhiyun 	int ret, i;
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 	DBG("%s", gpu->name);
399*4882a593Smuzhiyun 
400*4882a593Smuzhiyun 	ret = adreno_load_fw(adreno_gpu);
401*4882a593Smuzhiyun 	if (ret)
402*4882a593Smuzhiyun 		return ret;
403*4882a593Smuzhiyun 
404*4882a593Smuzhiyun 	for (i = 0; i < gpu->nr_rings; i++) {
405*4882a593Smuzhiyun 		struct msm_ringbuffer *ring = gpu->rb[i];
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 		if (!ring)
408*4882a593Smuzhiyun 			continue;
409*4882a593Smuzhiyun 
410*4882a593Smuzhiyun 		ring->cur = ring->start;
411*4882a593Smuzhiyun 		ring->next = ring->start;
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 		/* reset completed fence seqno: */
414*4882a593Smuzhiyun 		ring->memptrs->fence = ring->fctx->completed_fence;
415*4882a593Smuzhiyun 		ring->memptrs->rptr = 0;
416*4882a593Smuzhiyun 	}
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	return 0;
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun 
421*4882a593Smuzhiyun /* Use this helper to read rptr, since a430 doesn't update rptr in memory */
get_rptr(struct adreno_gpu * adreno_gpu,struct msm_ringbuffer * ring)422*4882a593Smuzhiyun static uint32_t get_rptr(struct adreno_gpu *adreno_gpu,
423*4882a593Smuzhiyun 		struct msm_ringbuffer *ring)
424*4882a593Smuzhiyun {
425*4882a593Smuzhiyun 	struct msm_gpu *gpu = &adreno_gpu->base;
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	return gpu->funcs->get_rptr(gpu, ring);
428*4882a593Smuzhiyun }
429*4882a593Smuzhiyun 
adreno_active_ring(struct msm_gpu * gpu)430*4882a593Smuzhiyun struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu)
431*4882a593Smuzhiyun {
432*4882a593Smuzhiyun 	return gpu->rb[0];
433*4882a593Smuzhiyun }
434*4882a593Smuzhiyun 
adreno_recover(struct msm_gpu * gpu)435*4882a593Smuzhiyun void adreno_recover(struct msm_gpu *gpu)
436*4882a593Smuzhiyun {
437*4882a593Smuzhiyun 	struct drm_device *dev = gpu->dev;
438*4882a593Smuzhiyun 	int ret;
439*4882a593Smuzhiyun 
440*4882a593Smuzhiyun 	// XXX pm-runtime??  we *need* the device to be off after this
441*4882a593Smuzhiyun 	// so maybe continuing to call ->pm_suspend/resume() is better?
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun 	gpu->funcs->pm_suspend(gpu);
444*4882a593Smuzhiyun 	gpu->funcs->pm_resume(gpu);
445*4882a593Smuzhiyun 
446*4882a593Smuzhiyun 	ret = msm_gpu_hw_init(gpu);
447*4882a593Smuzhiyun 	if (ret) {
448*4882a593Smuzhiyun 		DRM_DEV_ERROR(dev->dev, "gpu hw init failed: %d\n", ret);
449*4882a593Smuzhiyun 		/* hmm, oh well? */
450*4882a593Smuzhiyun 	}
451*4882a593Smuzhiyun }
452*4882a593Smuzhiyun 
adreno_flush(struct msm_gpu * gpu,struct msm_ringbuffer * ring,u32 reg)453*4882a593Smuzhiyun void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg)
454*4882a593Smuzhiyun {
455*4882a593Smuzhiyun 	uint32_t wptr;
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	/* Copy the shadow to the actual register */
458*4882a593Smuzhiyun 	ring->cur = ring->next;
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	/*
461*4882a593Smuzhiyun 	 * Mask wptr value that we calculate to fit in the HW range. This is
462*4882a593Smuzhiyun 	 * to account for the possibility that the last command fit exactly into
463*4882a593Smuzhiyun 	 * the ringbuffer and rb->next hasn't wrapped to zero yet
464*4882a593Smuzhiyun 	 */
465*4882a593Smuzhiyun 	wptr = get_wptr(ring);
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	/* ensure writes to ringbuffer have hit system memory: */
468*4882a593Smuzhiyun 	mb();
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	gpu_write(gpu, reg, wptr);
471*4882a593Smuzhiyun }
472*4882a593Smuzhiyun 
adreno_idle(struct msm_gpu * gpu,struct msm_ringbuffer * ring)473*4882a593Smuzhiyun bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
474*4882a593Smuzhiyun {
475*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
476*4882a593Smuzhiyun 	uint32_t wptr = get_wptr(ring);
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun 	/* wait for CP to drain ringbuffer: */
479*4882a593Smuzhiyun 	if (!spin_until(get_rptr(adreno_gpu, ring) == wptr))
480*4882a593Smuzhiyun 		return true;
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun 	/* TODO maybe we need to reset GPU here to recover from hang? */
483*4882a593Smuzhiyun 	DRM_ERROR("%s: timeout waiting to drain ringbuffer %d rptr/wptr = %X/%X\n",
484*4882a593Smuzhiyun 		gpu->name, ring->id, get_rptr(adreno_gpu, ring), wptr);
485*4882a593Smuzhiyun 
486*4882a593Smuzhiyun 	return false;
487*4882a593Smuzhiyun }
488*4882a593Smuzhiyun 
adreno_gpu_state_get(struct msm_gpu * gpu,struct msm_gpu_state * state)489*4882a593Smuzhiyun int adreno_gpu_state_get(struct msm_gpu *gpu, struct msm_gpu_state *state)
490*4882a593Smuzhiyun {
491*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
492*4882a593Smuzhiyun 	int i, count = 0;
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	kref_init(&state->ref);
495*4882a593Smuzhiyun 
496*4882a593Smuzhiyun 	ktime_get_real_ts64(&state->time);
497*4882a593Smuzhiyun 
498*4882a593Smuzhiyun 	for (i = 0; i < gpu->nr_rings; i++) {
499*4882a593Smuzhiyun 		int size = 0, j;
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 		state->ring[i].fence = gpu->rb[i]->memptrs->fence;
502*4882a593Smuzhiyun 		state->ring[i].iova = gpu->rb[i]->iova;
503*4882a593Smuzhiyun 		state->ring[i].seqno = gpu->rb[i]->seqno;
504*4882a593Smuzhiyun 		state->ring[i].rptr = get_rptr(adreno_gpu, gpu->rb[i]);
505*4882a593Smuzhiyun 		state->ring[i].wptr = get_wptr(gpu->rb[i]);
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun 		/* Copy at least 'wptr' dwords of the data */
508*4882a593Smuzhiyun 		size = state->ring[i].wptr;
509*4882a593Smuzhiyun 
510*4882a593Smuzhiyun 		/* After wptr find the last non zero dword to save space */
511*4882a593Smuzhiyun 		for (j = state->ring[i].wptr; j < MSM_GPU_RINGBUFFER_SZ >> 2; j++)
512*4882a593Smuzhiyun 			if (gpu->rb[i]->start[j])
513*4882a593Smuzhiyun 				size = j + 1;
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 		if (size) {
516*4882a593Smuzhiyun 			state->ring[i].data = kvmalloc(size << 2, GFP_KERNEL);
517*4882a593Smuzhiyun 			if (state->ring[i].data) {
518*4882a593Smuzhiyun 				memcpy(state->ring[i].data, gpu->rb[i]->start, size << 2);
519*4882a593Smuzhiyun 				state->ring[i].data_size = size << 2;
520*4882a593Smuzhiyun 			}
521*4882a593Smuzhiyun 		}
522*4882a593Smuzhiyun 	}
523*4882a593Smuzhiyun 
524*4882a593Smuzhiyun 	/* Some targets prefer to collect their own registers */
525*4882a593Smuzhiyun 	if (!adreno_gpu->registers)
526*4882a593Smuzhiyun 		return 0;
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	/* Count the number of registers */
529*4882a593Smuzhiyun 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2)
530*4882a593Smuzhiyun 		count += adreno_gpu->registers[i + 1] -
531*4882a593Smuzhiyun 			adreno_gpu->registers[i] + 1;
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 	state->registers = kcalloc(count * 2, sizeof(u32), GFP_KERNEL);
534*4882a593Smuzhiyun 	if (state->registers) {
535*4882a593Smuzhiyun 		int pos = 0;
536*4882a593Smuzhiyun 
537*4882a593Smuzhiyun 		for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
538*4882a593Smuzhiyun 			u32 start = adreno_gpu->registers[i];
539*4882a593Smuzhiyun 			u32 end   = adreno_gpu->registers[i + 1];
540*4882a593Smuzhiyun 			u32 addr;
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 			for (addr = start; addr <= end; addr++) {
543*4882a593Smuzhiyun 				state->registers[pos++] = addr;
544*4882a593Smuzhiyun 				state->registers[pos++] = gpu_read(gpu, addr);
545*4882a593Smuzhiyun 			}
546*4882a593Smuzhiyun 		}
547*4882a593Smuzhiyun 
548*4882a593Smuzhiyun 		state->nr_registers = count;
549*4882a593Smuzhiyun 	}
550*4882a593Smuzhiyun 
551*4882a593Smuzhiyun 	return 0;
552*4882a593Smuzhiyun }
553*4882a593Smuzhiyun 
adreno_gpu_state_destroy(struct msm_gpu_state * state)554*4882a593Smuzhiyun void adreno_gpu_state_destroy(struct msm_gpu_state *state)
555*4882a593Smuzhiyun {
556*4882a593Smuzhiyun 	int i;
557*4882a593Smuzhiyun 
558*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(state->ring); i++)
559*4882a593Smuzhiyun 		kvfree(state->ring[i].data);
560*4882a593Smuzhiyun 
561*4882a593Smuzhiyun 	for (i = 0; state->bos && i < state->nr_bos; i++)
562*4882a593Smuzhiyun 		kvfree(state->bos[i].data);
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	kfree(state->bos);
565*4882a593Smuzhiyun 	kfree(state->comm);
566*4882a593Smuzhiyun 	kfree(state->cmd);
567*4882a593Smuzhiyun 	kfree(state->registers);
568*4882a593Smuzhiyun }
569*4882a593Smuzhiyun 
adreno_gpu_state_kref_destroy(struct kref * kref)570*4882a593Smuzhiyun static void adreno_gpu_state_kref_destroy(struct kref *kref)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun 	struct msm_gpu_state *state = container_of(kref,
573*4882a593Smuzhiyun 		struct msm_gpu_state, ref);
574*4882a593Smuzhiyun 
575*4882a593Smuzhiyun 	adreno_gpu_state_destroy(state);
576*4882a593Smuzhiyun 	kfree(state);
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun 
adreno_gpu_state_put(struct msm_gpu_state * state)579*4882a593Smuzhiyun int adreno_gpu_state_put(struct msm_gpu_state *state)
580*4882a593Smuzhiyun {
581*4882a593Smuzhiyun 	if (IS_ERR_OR_NULL(state))
582*4882a593Smuzhiyun 		return 1;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	return kref_put(&state->ref, adreno_gpu_state_kref_destroy);
585*4882a593Smuzhiyun }
586*4882a593Smuzhiyun 
587*4882a593Smuzhiyun #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
588*4882a593Smuzhiyun 
adreno_gpu_ascii85_encode(u32 * src,size_t len)589*4882a593Smuzhiyun static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
590*4882a593Smuzhiyun {
591*4882a593Smuzhiyun 	void *buf;
592*4882a593Smuzhiyun 	size_t buf_itr = 0, buffer_size;
593*4882a593Smuzhiyun 	char out[ASCII85_BUFSZ];
594*4882a593Smuzhiyun 	long l;
595*4882a593Smuzhiyun 	int i;
596*4882a593Smuzhiyun 
597*4882a593Smuzhiyun 	if (!src || !len)
598*4882a593Smuzhiyun 		return NULL;
599*4882a593Smuzhiyun 
600*4882a593Smuzhiyun 	l = ascii85_encode_len(len);
601*4882a593Smuzhiyun 
602*4882a593Smuzhiyun 	/*
603*4882a593Smuzhiyun 	 * Ascii85 outputs either a 5 byte string or a 1 byte string. So we
604*4882a593Smuzhiyun 	 * account for the worst case of 5 bytes per dword plus the 1 for '\0'
605*4882a593Smuzhiyun 	 */
606*4882a593Smuzhiyun 	buffer_size = (l * 5) + 1;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	buf = kvmalloc(buffer_size, GFP_KERNEL);
609*4882a593Smuzhiyun 	if (!buf)
610*4882a593Smuzhiyun 		return NULL;
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 	for (i = 0; i < l; i++)
613*4882a593Smuzhiyun 		buf_itr += scnprintf(buf + buf_itr, buffer_size - buf_itr, "%s",
614*4882a593Smuzhiyun 				ascii85_encode(src[i], out));
615*4882a593Smuzhiyun 
616*4882a593Smuzhiyun 	return buf;
617*4882a593Smuzhiyun }
618*4882a593Smuzhiyun 
619*4882a593Smuzhiyun /* len is expected to be in bytes */
adreno_show_object(struct drm_printer * p,void ** ptr,int len,bool * encoded)620*4882a593Smuzhiyun static void adreno_show_object(struct drm_printer *p, void **ptr, int len,
621*4882a593Smuzhiyun 		bool *encoded)
622*4882a593Smuzhiyun {
623*4882a593Smuzhiyun 	if (!*ptr || !len)
624*4882a593Smuzhiyun 		return;
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun 	if (!*encoded) {
627*4882a593Smuzhiyun 		long datalen, i;
628*4882a593Smuzhiyun 		u32 *buf = *ptr;
629*4882a593Smuzhiyun 
630*4882a593Smuzhiyun 		/*
631*4882a593Smuzhiyun 		 * Only dump the non-zero part of the buffer - rarely will
632*4882a593Smuzhiyun 		 * any data completely fill the entire allocated size of
633*4882a593Smuzhiyun 		 * the buffer.
634*4882a593Smuzhiyun 		 */
635*4882a593Smuzhiyun 		for (datalen = 0, i = 0; i < len >> 2; i++)
636*4882a593Smuzhiyun 			if (buf[i])
637*4882a593Smuzhiyun 				datalen = ((i + 1) << 2);
638*4882a593Smuzhiyun 
639*4882a593Smuzhiyun 		/*
640*4882a593Smuzhiyun 		 * If we reach here, then the originally captured binary buffer
641*4882a593Smuzhiyun 		 * will be replaced with the ascii85 encoded string
642*4882a593Smuzhiyun 		 */
643*4882a593Smuzhiyun 		*ptr = adreno_gpu_ascii85_encode(buf, datalen);
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 		kvfree(buf);
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun 		*encoded = true;
648*4882a593Smuzhiyun 	}
649*4882a593Smuzhiyun 
650*4882a593Smuzhiyun 	if (!*ptr)
651*4882a593Smuzhiyun 		return;
652*4882a593Smuzhiyun 
653*4882a593Smuzhiyun 	drm_puts(p, "    data: !!ascii85 |\n");
654*4882a593Smuzhiyun 	drm_puts(p, "     ");
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 	drm_puts(p, *ptr);
657*4882a593Smuzhiyun 
658*4882a593Smuzhiyun 	drm_puts(p, "\n");
659*4882a593Smuzhiyun }
660*4882a593Smuzhiyun 
adreno_show(struct msm_gpu * gpu,struct msm_gpu_state * state,struct drm_printer * p)661*4882a593Smuzhiyun void adreno_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
662*4882a593Smuzhiyun 		struct drm_printer *p)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
665*4882a593Smuzhiyun 	int i;
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 	if (IS_ERR_OR_NULL(state))
668*4882a593Smuzhiyun 		return;
669*4882a593Smuzhiyun 
670*4882a593Smuzhiyun 	drm_printf(p, "revision: %d (%d.%d.%d.%d)\n",
671*4882a593Smuzhiyun 			adreno_gpu->info->revn, adreno_gpu->rev.core,
672*4882a593Smuzhiyun 			adreno_gpu->rev.major, adreno_gpu->rev.minor,
673*4882a593Smuzhiyun 			adreno_gpu->rev.patchid);
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	drm_printf(p, "rbbm-status: 0x%08x\n", state->rbbm_status);
676*4882a593Smuzhiyun 
677*4882a593Smuzhiyun 	drm_puts(p, "ringbuffer:\n");
678*4882a593Smuzhiyun 
679*4882a593Smuzhiyun 	for (i = 0; i < gpu->nr_rings; i++) {
680*4882a593Smuzhiyun 		drm_printf(p, "  - id: %d\n", i);
681*4882a593Smuzhiyun 		drm_printf(p, "    iova: 0x%016llx\n", state->ring[i].iova);
682*4882a593Smuzhiyun 		drm_printf(p, "    last-fence: %d\n", state->ring[i].seqno);
683*4882a593Smuzhiyun 		drm_printf(p, "    retired-fence: %d\n", state->ring[i].fence);
684*4882a593Smuzhiyun 		drm_printf(p, "    rptr: %d\n", state->ring[i].rptr);
685*4882a593Smuzhiyun 		drm_printf(p, "    wptr: %d\n", state->ring[i].wptr);
686*4882a593Smuzhiyun 		drm_printf(p, "    size: %d\n", MSM_GPU_RINGBUFFER_SZ);
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 		adreno_show_object(p, &state->ring[i].data,
689*4882a593Smuzhiyun 			state->ring[i].data_size, &state->ring[i].encoded);
690*4882a593Smuzhiyun 	}
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 	if (state->bos) {
693*4882a593Smuzhiyun 		drm_puts(p, "bos:\n");
694*4882a593Smuzhiyun 
695*4882a593Smuzhiyun 		for (i = 0; i < state->nr_bos; i++) {
696*4882a593Smuzhiyun 			drm_printf(p, "  - iova: 0x%016llx\n",
697*4882a593Smuzhiyun 				state->bos[i].iova);
698*4882a593Smuzhiyun 			drm_printf(p, "    size: %zd\n", state->bos[i].size);
699*4882a593Smuzhiyun 
700*4882a593Smuzhiyun 			adreno_show_object(p, &state->bos[i].data,
701*4882a593Smuzhiyun 				state->bos[i].size, &state->bos[i].encoded);
702*4882a593Smuzhiyun 		}
703*4882a593Smuzhiyun 	}
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	if (state->nr_registers) {
706*4882a593Smuzhiyun 		drm_puts(p, "registers:\n");
707*4882a593Smuzhiyun 
708*4882a593Smuzhiyun 		for (i = 0; i < state->nr_registers; i++) {
709*4882a593Smuzhiyun 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
710*4882a593Smuzhiyun 				state->registers[i * 2] << 2,
711*4882a593Smuzhiyun 				state->registers[(i * 2) + 1]);
712*4882a593Smuzhiyun 		}
713*4882a593Smuzhiyun 	}
714*4882a593Smuzhiyun }
715*4882a593Smuzhiyun #endif
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun /* Dump common gpu status and scratch registers on any hang, to make
718*4882a593Smuzhiyun  * the hangcheck logs more useful.  The scratch registers seem always
719*4882a593Smuzhiyun  * safe to read when GPU has hung (unlike some other regs, depending
720*4882a593Smuzhiyun  * on how the GPU hung), and they are useful to match up to cmdstream
721*4882a593Smuzhiyun  * dumps when debugging hangs:
722*4882a593Smuzhiyun  */
adreno_dump_info(struct msm_gpu * gpu)723*4882a593Smuzhiyun void adreno_dump_info(struct msm_gpu *gpu)
724*4882a593Smuzhiyun {
725*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
726*4882a593Smuzhiyun 	int i;
727*4882a593Smuzhiyun 
728*4882a593Smuzhiyun 	printk("revision: %d (%d.%d.%d.%d)\n",
729*4882a593Smuzhiyun 			adreno_gpu->info->revn, adreno_gpu->rev.core,
730*4882a593Smuzhiyun 			adreno_gpu->rev.major, adreno_gpu->rev.minor,
731*4882a593Smuzhiyun 			adreno_gpu->rev.patchid);
732*4882a593Smuzhiyun 
733*4882a593Smuzhiyun 	for (i = 0; i < gpu->nr_rings; i++) {
734*4882a593Smuzhiyun 		struct msm_ringbuffer *ring = gpu->rb[i];
735*4882a593Smuzhiyun 
736*4882a593Smuzhiyun 		printk("rb %d: fence:    %d/%d\n", i,
737*4882a593Smuzhiyun 			ring->memptrs->fence,
738*4882a593Smuzhiyun 			ring->seqno);
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 		printk("rptr:     %d\n", get_rptr(adreno_gpu, ring));
741*4882a593Smuzhiyun 		printk("rb wptr:  %d\n", get_wptr(ring));
742*4882a593Smuzhiyun 	}
743*4882a593Smuzhiyun }
744*4882a593Smuzhiyun 
745*4882a593Smuzhiyun /* would be nice to not have to duplicate the _show() stuff with printk(): */
adreno_dump(struct msm_gpu * gpu)746*4882a593Smuzhiyun void adreno_dump(struct msm_gpu *gpu)
747*4882a593Smuzhiyun {
748*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
749*4882a593Smuzhiyun 	int i;
750*4882a593Smuzhiyun 
751*4882a593Smuzhiyun 	if (!adreno_gpu->registers)
752*4882a593Smuzhiyun 		return;
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	/* dump these out in a form that can be parsed by demsm: */
755*4882a593Smuzhiyun 	printk("IO:region %s 00000000 00020000\n", gpu->name);
756*4882a593Smuzhiyun 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
757*4882a593Smuzhiyun 		uint32_t start = adreno_gpu->registers[i];
758*4882a593Smuzhiyun 		uint32_t end   = adreno_gpu->registers[i+1];
759*4882a593Smuzhiyun 		uint32_t addr;
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun 		for (addr = start; addr <= end; addr++) {
762*4882a593Smuzhiyun 			uint32_t val = gpu_read(gpu, addr);
763*4882a593Smuzhiyun 			printk("IO:R %08x %08x\n", addr<<2, val);
764*4882a593Smuzhiyun 		}
765*4882a593Smuzhiyun 	}
766*4882a593Smuzhiyun }
767*4882a593Smuzhiyun 
ring_freewords(struct msm_ringbuffer * ring)768*4882a593Smuzhiyun static uint32_t ring_freewords(struct msm_ringbuffer *ring)
769*4882a593Smuzhiyun {
770*4882a593Smuzhiyun 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu);
771*4882a593Smuzhiyun 	uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2;
772*4882a593Smuzhiyun 	/* Use ring->next to calculate free size */
773*4882a593Smuzhiyun 	uint32_t wptr = ring->next - ring->start;
774*4882a593Smuzhiyun 	uint32_t rptr = get_rptr(adreno_gpu, ring);
775*4882a593Smuzhiyun 	return (rptr + (size - 1) - wptr) % size;
776*4882a593Smuzhiyun }
777*4882a593Smuzhiyun 
adreno_wait_ring(struct msm_ringbuffer * ring,uint32_t ndwords)778*4882a593Smuzhiyun void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
779*4882a593Smuzhiyun {
780*4882a593Smuzhiyun 	if (spin_until(ring_freewords(ring) >= ndwords))
781*4882a593Smuzhiyun 		DRM_DEV_ERROR(ring->gpu->dev->dev,
782*4882a593Smuzhiyun 			"timeout waiting for space in ringbuffer %d\n",
783*4882a593Smuzhiyun 			ring->id);
784*4882a593Smuzhiyun }
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun /* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */
adreno_get_legacy_pwrlevels(struct device * dev)787*4882a593Smuzhiyun static int adreno_get_legacy_pwrlevels(struct device *dev)
788*4882a593Smuzhiyun {
789*4882a593Smuzhiyun 	struct device_node *child, *node;
790*4882a593Smuzhiyun 	int ret;
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 	node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
793*4882a593Smuzhiyun 	if (!node) {
794*4882a593Smuzhiyun 		DRM_DEV_DEBUG(dev, "Could not find the GPU powerlevels\n");
795*4882a593Smuzhiyun 		return -ENXIO;
796*4882a593Smuzhiyun 	}
797*4882a593Smuzhiyun 
798*4882a593Smuzhiyun 	for_each_child_of_node(node, child) {
799*4882a593Smuzhiyun 		unsigned int val;
800*4882a593Smuzhiyun 
801*4882a593Smuzhiyun 		ret = of_property_read_u32(child, "qcom,gpu-freq", &val);
802*4882a593Smuzhiyun 		if (ret)
803*4882a593Smuzhiyun 			continue;
804*4882a593Smuzhiyun 
805*4882a593Smuzhiyun 		/*
806*4882a593Smuzhiyun 		 * Skip the intentionally bogus clock value found at the bottom
807*4882a593Smuzhiyun 		 * of most legacy frequency tables
808*4882a593Smuzhiyun 		 */
809*4882a593Smuzhiyun 		if (val != 27000000)
810*4882a593Smuzhiyun 			dev_pm_opp_add(dev, val, 0);
811*4882a593Smuzhiyun 	}
812*4882a593Smuzhiyun 
813*4882a593Smuzhiyun 	of_node_put(node);
814*4882a593Smuzhiyun 
815*4882a593Smuzhiyun 	return 0;
816*4882a593Smuzhiyun }
817*4882a593Smuzhiyun 
adreno_get_pwrlevels(struct device * dev,struct msm_gpu * gpu)818*4882a593Smuzhiyun static void adreno_get_pwrlevels(struct device *dev,
819*4882a593Smuzhiyun 		struct msm_gpu *gpu)
820*4882a593Smuzhiyun {
821*4882a593Smuzhiyun 	unsigned long freq = ULONG_MAX;
822*4882a593Smuzhiyun 	struct dev_pm_opp *opp;
823*4882a593Smuzhiyun 	int ret;
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 	gpu->fast_rate = 0;
826*4882a593Smuzhiyun 
827*4882a593Smuzhiyun 	/* You down with OPP? */
828*4882a593Smuzhiyun 	if (!of_find_property(dev->of_node, "operating-points-v2", NULL))
829*4882a593Smuzhiyun 		ret = adreno_get_legacy_pwrlevels(dev);
830*4882a593Smuzhiyun 	else {
831*4882a593Smuzhiyun 		ret = dev_pm_opp_of_add_table(dev);
832*4882a593Smuzhiyun 		if (ret)
833*4882a593Smuzhiyun 			DRM_DEV_ERROR(dev, "Unable to set the OPP table\n");
834*4882a593Smuzhiyun 	}
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	if (!ret) {
837*4882a593Smuzhiyun 		/* Find the fastest defined rate */
838*4882a593Smuzhiyun 		opp = dev_pm_opp_find_freq_floor(dev, &freq);
839*4882a593Smuzhiyun 		if (!IS_ERR(opp)) {
840*4882a593Smuzhiyun 			gpu->fast_rate = freq;
841*4882a593Smuzhiyun 			dev_pm_opp_put(opp);
842*4882a593Smuzhiyun 		}
843*4882a593Smuzhiyun 	}
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	if (!gpu->fast_rate) {
846*4882a593Smuzhiyun 		dev_warn(dev,
847*4882a593Smuzhiyun 			"Could not find a clock rate. Using a reasonable default\n");
848*4882a593Smuzhiyun 		/* Pick a suitably safe clock speed for any target */
849*4882a593Smuzhiyun 		gpu->fast_rate = 200000000;
850*4882a593Smuzhiyun 	}
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	DBG("fast_rate=%u, slow_rate=27000000", gpu->fast_rate);
853*4882a593Smuzhiyun }
854*4882a593Smuzhiyun 
adreno_gpu_ocmem_init(struct device * dev,struct adreno_gpu * adreno_gpu,struct adreno_ocmem * adreno_ocmem)855*4882a593Smuzhiyun int adreno_gpu_ocmem_init(struct device *dev, struct adreno_gpu *adreno_gpu,
856*4882a593Smuzhiyun 			  struct adreno_ocmem *adreno_ocmem)
857*4882a593Smuzhiyun {
858*4882a593Smuzhiyun 	struct ocmem_buf *ocmem_hdl;
859*4882a593Smuzhiyun 	struct ocmem *ocmem;
860*4882a593Smuzhiyun 
861*4882a593Smuzhiyun 	ocmem = of_get_ocmem(dev);
862*4882a593Smuzhiyun 	if (IS_ERR(ocmem)) {
863*4882a593Smuzhiyun 		if (PTR_ERR(ocmem) == -ENODEV) {
864*4882a593Smuzhiyun 			/*
865*4882a593Smuzhiyun 			 * Return success since either the ocmem property was
866*4882a593Smuzhiyun 			 * not specified in device tree, or ocmem support is
867*4882a593Smuzhiyun 			 * not compiled into the kernel.
868*4882a593Smuzhiyun 			 */
869*4882a593Smuzhiyun 			return 0;
870*4882a593Smuzhiyun 		}
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 		return PTR_ERR(ocmem);
873*4882a593Smuzhiyun 	}
874*4882a593Smuzhiyun 
875*4882a593Smuzhiyun 	ocmem_hdl = ocmem_allocate(ocmem, OCMEM_GRAPHICS, adreno_gpu->gmem);
876*4882a593Smuzhiyun 	if (IS_ERR(ocmem_hdl))
877*4882a593Smuzhiyun 		return PTR_ERR(ocmem_hdl);
878*4882a593Smuzhiyun 
879*4882a593Smuzhiyun 	adreno_ocmem->ocmem = ocmem;
880*4882a593Smuzhiyun 	adreno_ocmem->base = ocmem_hdl->addr;
881*4882a593Smuzhiyun 	adreno_ocmem->hdl = ocmem_hdl;
882*4882a593Smuzhiyun 	adreno_gpu->gmem = ocmem_hdl->len;
883*4882a593Smuzhiyun 
884*4882a593Smuzhiyun 	return 0;
885*4882a593Smuzhiyun }
886*4882a593Smuzhiyun 
adreno_gpu_ocmem_cleanup(struct adreno_ocmem * adreno_ocmem)887*4882a593Smuzhiyun void adreno_gpu_ocmem_cleanup(struct adreno_ocmem *adreno_ocmem)
888*4882a593Smuzhiyun {
889*4882a593Smuzhiyun 	if (adreno_ocmem && adreno_ocmem->base)
890*4882a593Smuzhiyun 		ocmem_free(adreno_ocmem->ocmem, OCMEM_GRAPHICS,
891*4882a593Smuzhiyun 			   adreno_ocmem->hdl);
892*4882a593Smuzhiyun }
893*4882a593Smuzhiyun 
adreno_gpu_init(struct drm_device * drm,struct platform_device * pdev,struct adreno_gpu * adreno_gpu,const struct adreno_gpu_funcs * funcs,int nr_rings)894*4882a593Smuzhiyun int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
895*4882a593Smuzhiyun 		struct adreno_gpu *adreno_gpu,
896*4882a593Smuzhiyun 		const struct adreno_gpu_funcs *funcs, int nr_rings)
897*4882a593Smuzhiyun {
898*4882a593Smuzhiyun 	struct device *dev = &pdev->dev;
899*4882a593Smuzhiyun 	struct adreno_platform_config *config = dev->platform_data;
900*4882a593Smuzhiyun 	struct msm_gpu_config adreno_gpu_config  = { 0 };
901*4882a593Smuzhiyun 	struct msm_gpu *gpu = &adreno_gpu->base;
902*4882a593Smuzhiyun 	int ret;
903*4882a593Smuzhiyun 
904*4882a593Smuzhiyun 	adreno_gpu->funcs = funcs;
905*4882a593Smuzhiyun 	adreno_gpu->info = adreno_info(config->rev);
906*4882a593Smuzhiyun 	adreno_gpu->gmem = adreno_gpu->info->gmem;
907*4882a593Smuzhiyun 	adreno_gpu->revn = adreno_gpu->info->revn;
908*4882a593Smuzhiyun 	adreno_gpu->rev = config->rev;
909*4882a593Smuzhiyun 
910*4882a593Smuzhiyun 	adreno_gpu_config.ioname = "kgsl_3d0_reg_memory";
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	adreno_gpu_config.nr_rings = nr_rings;
913*4882a593Smuzhiyun 
914*4882a593Smuzhiyun 	adreno_get_pwrlevels(dev, gpu);
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	pm_runtime_set_autosuspend_delay(dev,
917*4882a593Smuzhiyun 		adreno_gpu->info->inactive_period);
918*4882a593Smuzhiyun 	pm_runtime_use_autosuspend(dev);
919*4882a593Smuzhiyun 	pm_runtime_enable(dev);
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
922*4882a593Smuzhiyun 			adreno_gpu->info->name, &adreno_gpu_config);
923*4882a593Smuzhiyun 	if (ret)
924*4882a593Smuzhiyun 		return ret;
925*4882a593Smuzhiyun 
926*4882a593Smuzhiyun 	/*
927*4882a593Smuzhiyun 	 * The legacy case, before "interconnect-names", only has a
928*4882a593Smuzhiyun 	 * single interconnect path which is equivalent to "gfx-mem"
929*4882a593Smuzhiyun 	 */
930*4882a593Smuzhiyun 	if (!of_find_property(dev->of_node, "interconnect-names", NULL)) {
931*4882a593Smuzhiyun 		gpu->icc_path = of_icc_get(dev, NULL);
932*4882a593Smuzhiyun 	} else {
933*4882a593Smuzhiyun 		gpu->icc_path = of_icc_get(dev, "gfx-mem");
934*4882a593Smuzhiyun 		gpu->ocmem_icc_path = of_icc_get(dev, "ocmem");
935*4882a593Smuzhiyun 	}
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun 	if (IS_ERR(gpu->icc_path)) {
938*4882a593Smuzhiyun 		ret = PTR_ERR(gpu->icc_path);
939*4882a593Smuzhiyun 		gpu->icc_path = NULL;
940*4882a593Smuzhiyun 		return ret;
941*4882a593Smuzhiyun 	}
942*4882a593Smuzhiyun 
943*4882a593Smuzhiyun 	if (IS_ERR(gpu->ocmem_icc_path)) {
944*4882a593Smuzhiyun 		ret = PTR_ERR(gpu->ocmem_icc_path);
945*4882a593Smuzhiyun 		gpu->ocmem_icc_path = NULL;
946*4882a593Smuzhiyun 		/* allow -ENODATA, ocmem icc is optional */
947*4882a593Smuzhiyun 		if (ret != -ENODATA)
948*4882a593Smuzhiyun 			return ret;
949*4882a593Smuzhiyun 	}
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun 	return 0;
952*4882a593Smuzhiyun }
953*4882a593Smuzhiyun 
adreno_gpu_cleanup(struct adreno_gpu * adreno_gpu)954*4882a593Smuzhiyun void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
955*4882a593Smuzhiyun {
956*4882a593Smuzhiyun 	struct msm_gpu *gpu = &adreno_gpu->base;
957*4882a593Smuzhiyun 	struct msm_drm_private *priv = gpu->dev->dev_private;
958*4882a593Smuzhiyun 	unsigned int i;
959*4882a593Smuzhiyun 
960*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++)
961*4882a593Smuzhiyun 		release_firmware(adreno_gpu->fw[i]);
962*4882a593Smuzhiyun 
963*4882a593Smuzhiyun 	if (pm_runtime_enabled(&priv->gpu_pdev->dev))
964*4882a593Smuzhiyun 		pm_runtime_disable(&priv->gpu_pdev->dev);
965*4882a593Smuzhiyun 
966*4882a593Smuzhiyun 	msm_gpu_cleanup(&adreno_gpu->base);
967*4882a593Smuzhiyun 
968*4882a593Smuzhiyun 	icc_put(gpu->icc_path);
969*4882a593Smuzhiyun 	icc_put(gpu->ocmem_icc_path);
970*4882a593Smuzhiyun }
971