xref: /OK3568_Linux_fs/kernel/drivers/gpu/drm/vc4/vc4_perfmon.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Copyright (C) 2018 Broadcom
4*4882a593Smuzhiyun  */
5*4882a593Smuzhiyun 
6*4882a593Smuzhiyun /**
7*4882a593Smuzhiyun  * DOC: VC4 V3D performance monitor module
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * The V3D block provides 16 hardware counters which can count various events.
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include "vc4_drv.h"
13*4882a593Smuzhiyun #include "vc4_regs.h"
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun #define VC4_PERFMONID_MIN	1
16*4882a593Smuzhiyun #define VC4_PERFMONID_MAX	U32_MAX
17*4882a593Smuzhiyun 
vc4_perfmon_get(struct vc4_perfmon * perfmon)18*4882a593Smuzhiyun void vc4_perfmon_get(struct vc4_perfmon *perfmon)
19*4882a593Smuzhiyun {
20*4882a593Smuzhiyun 	if (perfmon)
21*4882a593Smuzhiyun 		refcount_inc(&perfmon->refcnt);
22*4882a593Smuzhiyun }
23*4882a593Smuzhiyun 
vc4_perfmon_put(struct vc4_perfmon * perfmon)24*4882a593Smuzhiyun void vc4_perfmon_put(struct vc4_perfmon *perfmon)
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun 	if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
27*4882a593Smuzhiyun 		kfree(perfmon);
28*4882a593Smuzhiyun }
29*4882a593Smuzhiyun 
vc4_perfmon_start(struct vc4_dev * vc4,struct vc4_perfmon * perfmon)30*4882a593Smuzhiyun void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
31*4882a593Smuzhiyun {
32*4882a593Smuzhiyun 	unsigned int i;
33*4882a593Smuzhiyun 	u32 mask;
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun 	if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
36*4882a593Smuzhiyun 		return;
37*4882a593Smuzhiyun 
38*4882a593Smuzhiyun 	for (i = 0; i < perfmon->ncounters; i++)
39*4882a593Smuzhiyun 		V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun 	mask = GENMASK(perfmon->ncounters - 1, 0);
42*4882a593Smuzhiyun 	V3D_WRITE(V3D_PCTRC, mask);
43*4882a593Smuzhiyun 	V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
44*4882a593Smuzhiyun 	vc4->active_perfmon = perfmon;
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun 
vc4_perfmon_stop(struct vc4_dev * vc4,struct vc4_perfmon * perfmon,bool capture)47*4882a593Smuzhiyun void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
48*4882a593Smuzhiyun 		      bool capture)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun 	unsigned int i;
51*4882a593Smuzhiyun 
52*4882a593Smuzhiyun 	if (WARN_ON_ONCE(!vc4->active_perfmon ||
53*4882a593Smuzhiyun 			 perfmon != vc4->active_perfmon))
54*4882a593Smuzhiyun 		return;
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 	if (capture) {
57*4882a593Smuzhiyun 		for (i = 0; i < perfmon->ncounters; i++)
58*4882a593Smuzhiyun 			perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
59*4882a593Smuzhiyun 	}
60*4882a593Smuzhiyun 
61*4882a593Smuzhiyun 	V3D_WRITE(V3D_PCTRE, 0);
62*4882a593Smuzhiyun 	vc4->active_perfmon = NULL;
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun 
vc4_perfmon_find(struct vc4_file * vc4file,int id)65*4882a593Smuzhiyun struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun 	struct vc4_perfmon *perfmon;
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun 	mutex_lock(&vc4file->perfmon.lock);
70*4882a593Smuzhiyun 	perfmon = idr_find(&vc4file->perfmon.idr, id);
71*4882a593Smuzhiyun 	vc4_perfmon_get(perfmon);
72*4882a593Smuzhiyun 	mutex_unlock(&vc4file->perfmon.lock);
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	return perfmon;
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun 
vc4_perfmon_open_file(struct vc4_file * vc4file)77*4882a593Smuzhiyun void vc4_perfmon_open_file(struct vc4_file *vc4file)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	mutex_init(&vc4file->perfmon.lock);
80*4882a593Smuzhiyun 	idr_init(&vc4file->perfmon.idr);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun 
vc4_perfmon_idr_del(int id,void * elem,void * data)83*4882a593Smuzhiyun static int vc4_perfmon_idr_del(int id, void *elem, void *data)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun 	struct vc4_perfmon *perfmon = elem;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	vc4_perfmon_put(perfmon);
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun 	return 0;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
vc4_perfmon_close_file(struct vc4_file * vc4file)92*4882a593Smuzhiyun void vc4_perfmon_close_file(struct vc4_file *vc4file)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	mutex_lock(&vc4file->perfmon.lock);
95*4882a593Smuzhiyun 	idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
96*4882a593Smuzhiyun 	idr_destroy(&vc4file->perfmon.idr);
97*4882a593Smuzhiyun 	mutex_unlock(&vc4file->perfmon.lock);
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun 
vc4_perfmon_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)100*4882a593Smuzhiyun int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
101*4882a593Smuzhiyun 			     struct drm_file *file_priv)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun 	struct vc4_dev *vc4 = to_vc4_dev(dev);
104*4882a593Smuzhiyun 	struct vc4_file *vc4file = file_priv->driver_priv;
105*4882a593Smuzhiyun 	struct drm_vc4_perfmon_create *req = data;
106*4882a593Smuzhiyun 	struct vc4_perfmon *perfmon;
107*4882a593Smuzhiyun 	unsigned int i;
108*4882a593Smuzhiyun 	int ret;
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	if (!vc4->v3d) {
111*4882a593Smuzhiyun 		DRM_DEBUG("Creating perfmon no VC4 V3D probed\n");
112*4882a593Smuzhiyun 		return -ENODEV;
113*4882a593Smuzhiyun 	}
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	/* Number of monitored counters cannot exceed HW limits. */
116*4882a593Smuzhiyun 	if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
117*4882a593Smuzhiyun 	    !req->ncounters)
118*4882a593Smuzhiyun 		return -EINVAL;
119*4882a593Smuzhiyun 
120*4882a593Smuzhiyun 	/* Make sure all events are valid. */
121*4882a593Smuzhiyun 	for (i = 0; i < req->ncounters; i++) {
122*4882a593Smuzhiyun 		if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
123*4882a593Smuzhiyun 			return -EINVAL;
124*4882a593Smuzhiyun 	}
125*4882a593Smuzhiyun 
126*4882a593Smuzhiyun 	perfmon = kzalloc(struct_size(perfmon, counters, req->ncounters),
127*4882a593Smuzhiyun 			  GFP_KERNEL);
128*4882a593Smuzhiyun 	if (!perfmon)
129*4882a593Smuzhiyun 		return -ENOMEM;
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun 	for (i = 0; i < req->ncounters; i++)
132*4882a593Smuzhiyun 		perfmon->events[i] = req->events[i];
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun 	perfmon->ncounters = req->ncounters;
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun 	refcount_set(&perfmon->refcnt, 1);
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	mutex_lock(&vc4file->perfmon.lock);
139*4882a593Smuzhiyun 	ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
140*4882a593Smuzhiyun 			VC4_PERFMONID_MAX, GFP_KERNEL);
141*4882a593Smuzhiyun 	mutex_unlock(&vc4file->perfmon.lock);
142*4882a593Smuzhiyun 
143*4882a593Smuzhiyun 	if (ret < 0) {
144*4882a593Smuzhiyun 		kfree(perfmon);
145*4882a593Smuzhiyun 		return ret;
146*4882a593Smuzhiyun 	}
147*4882a593Smuzhiyun 
148*4882a593Smuzhiyun 	req->id = ret;
149*4882a593Smuzhiyun 	return 0;
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun 
vc4_perfmon_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)152*4882a593Smuzhiyun int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
153*4882a593Smuzhiyun 			      struct drm_file *file_priv)
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun 	struct vc4_dev *vc4 = to_vc4_dev(dev);
156*4882a593Smuzhiyun 	struct vc4_file *vc4file = file_priv->driver_priv;
157*4882a593Smuzhiyun 	struct drm_vc4_perfmon_destroy *req = data;
158*4882a593Smuzhiyun 	struct vc4_perfmon *perfmon;
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	if (!vc4->v3d) {
161*4882a593Smuzhiyun 		DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n");
162*4882a593Smuzhiyun 		return -ENODEV;
163*4882a593Smuzhiyun 	}
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun 	mutex_lock(&vc4file->perfmon.lock);
166*4882a593Smuzhiyun 	perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
167*4882a593Smuzhiyun 	mutex_unlock(&vc4file->perfmon.lock);
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	if (!perfmon)
170*4882a593Smuzhiyun 		return -EINVAL;
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun 	vc4_perfmon_put(perfmon);
173*4882a593Smuzhiyun 	return 0;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun 
vc4_perfmon_get_values_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)176*4882a593Smuzhiyun int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
177*4882a593Smuzhiyun 				 struct drm_file *file_priv)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun 	struct vc4_dev *vc4 = to_vc4_dev(dev);
180*4882a593Smuzhiyun 	struct vc4_file *vc4file = file_priv->driver_priv;
181*4882a593Smuzhiyun 	struct drm_vc4_perfmon_get_values *req = data;
182*4882a593Smuzhiyun 	struct vc4_perfmon *perfmon;
183*4882a593Smuzhiyun 	int ret;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun 	if (!vc4->v3d) {
186*4882a593Smuzhiyun 		DRM_DEBUG("Getting perfmon no VC4 V3D probed\n");
187*4882a593Smuzhiyun 		return -ENODEV;
188*4882a593Smuzhiyun 	}
189*4882a593Smuzhiyun 
190*4882a593Smuzhiyun 	mutex_lock(&vc4file->perfmon.lock);
191*4882a593Smuzhiyun 	perfmon = idr_find(&vc4file->perfmon.idr, req->id);
192*4882a593Smuzhiyun 	vc4_perfmon_get(perfmon);
193*4882a593Smuzhiyun 	mutex_unlock(&vc4file->perfmon.lock);
194*4882a593Smuzhiyun 
195*4882a593Smuzhiyun 	if (!perfmon)
196*4882a593Smuzhiyun 		return -EINVAL;
197*4882a593Smuzhiyun 
198*4882a593Smuzhiyun 	if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
199*4882a593Smuzhiyun 			 perfmon->ncounters * sizeof(u64)))
200*4882a593Smuzhiyun 		ret = -EFAULT;
201*4882a593Smuzhiyun 	else
202*4882a593Smuzhiyun 		ret = 0;
203*4882a593Smuzhiyun 
204*4882a593Smuzhiyun 	vc4_perfmon_put(perfmon);
205*4882a593Smuzhiyun 	return ret;
206*4882a593Smuzhiyun }
207