1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun * Copyright (C) 2018 Broadcom
4*4882a593Smuzhiyun */
5*4882a593Smuzhiyun
6*4882a593Smuzhiyun /**
7*4882a593Smuzhiyun * DOC: VC4 V3D performance monitor module
8*4882a593Smuzhiyun *
9*4882a593Smuzhiyun * The V3D block provides 16 hardware counters which can count various events.
10*4882a593Smuzhiyun */
11*4882a593Smuzhiyun
12*4882a593Smuzhiyun #include "vc4_drv.h"
13*4882a593Smuzhiyun #include "vc4_regs.h"
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #define VC4_PERFMONID_MIN 1
16*4882a593Smuzhiyun #define VC4_PERFMONID_MAX U32_MAX
17*4882a593Smuzhiyun
vc4_perfmon_get(struct vc4_perfmon * perfmon)18*4882a593Smuzhiyun void vc4_perfmon_get(struct vc4_perfmon *perfmon)
19*4882a593Smuzhiyun {
20*4882a593Smuzhiyun if (perfmon)
21*4882a593Smuzhiyun refcount_inc(&perfmon->refcnt);
22*4882a593Smuzhiyun }
23*4882a593Smuzhiyun
vc4_perfmon_put(struct vc4_perfmon * perfmon)24*4882a593Smuzhiyun void vc4_perfmon_put(struct vc4_perfmon *perfmon)
25*4882a593Smuzhiyun {
26*4882a593Smuzhiyun if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
27*4882a593Smuzhiyun kfree(perfmon);
28*4882a593Smuzhiyun }
29*4882a593Smuzhiyun
vc4_perfmon_start(struct vc4_dev * vc4,struct vc4_perfmon * perfmon)30*4882a593Smuzhiyun void vc4_perfmon_start(struct vc4_dev *vc4, struct vc4_perfmon *perfmon)
31*4882a593Smuzhiyun {
32*4882a593Smuzhiyun unsigned int i;
33*4882a593Smuzhiyun u32 mask;
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
36*4882a593Smuzhiyun return;
37*4882a593Smuzhiyun
38*4882a593Smuzhiyun for (i = 0; i < perfmon->ncounters; i++)
39*4882a593Smuzhiyun V3D_WRITE(V3D_PCTRS(i), perfmon->events[i]);
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun mask = GENMASK(perfmon->ncounters - 1, 0);
42*4882a593Smuzhiyun V3D_WRITE(V3D_PCTRC, mask);
43*4882a593Smuzhiyun V3D_WRITE(V3D_PCTRE, V3D_PCTRE_EN | mask);
44*4882a593Smuzhiyun vc4->active_perfmon = perfmon;
45*4882a593Smuzhiyun }
46*4882a593Smuzhiyun
vc4_perfmon_stop(struct vc4_dev * vc4,struct vc4_perfmon * perfmon,bool capture)47*4882a593Smuzhiyun void vc4_perfmon_stop(struct vc4_dev *vc4, struct vc4_perfmon *perfmon,
48*4882a593Smuzhiyun bool capture)
49*4882a593Smuzhiyun {
50*4882a593Smuzhiyun unsigned int i;
51*4882a593Smuzhiyun
52*4882a593Smuzhiyun if (WARN_ON_ONCE(!vc4->active_perfmon ||
53*4882a593Smuzhiyun perfmon != vc4->active_perfmon))
54*4882a593Smuzhiyun return;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun if (capture) {
57*4882a593Smuzhiyun for (i = 0; i < perfmon->ncounters; i++)
58*4882a593Smuzhiyun perfmon->counters[i] += V3D_READ(V3D_PCTR(i));
59*4882a593Smuzhiyun }
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun V3D_WRITE(V3D_PCTRE, 0);
62*4882a593Smuzhiyun vc4->active_perfmon = NULL;
63*4882a593Smuzhiyun }
64*4882a593Smuzhiyun
vc4_perfmon_find(struct vc4_file * vc4file,int id)65*4882a593Smuzhiyun struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
66*4882a593Smuzhiyun {
67*4882a593Smuzhiyun struct vc4_perfmon *perfmon;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun mutex_lock(&vc4file->perfmon.lock);
70*4882a593Smuzhiyun perfmon = idr_find(&vc4file->perfmon.idr, id);
71*4882a593Smuzhiyun vc4_perfmon_get(perfmon);
72*4882a593Smuzhiyun mutex_unlock(&vc4file->perfmon.lock);
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun return perfmon;
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun
vc4_perfmon_open_file(struct vc4_file * vc4file)77*4882a593Smuzhiyun void vc4_perfmon_open_file(struct vc4_file *vc4file)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun mutex_init(&vc4file->perfmon.lock);
80*4882a593Smuzhiyun idr_init(&vc4file->perfmon.idr);
81*4882a593Smuzhiyun }
82*4882a593Smuzhiyun
vc4_perfmon_idr_del(int id,void * elem,void * data)83*4882a593Smuzhiyun static int vc4_perfmon_idr_del(int id, void *elem, void *data)
84*4882a593Smuzhiyun {
85*4882a593Smuzhiyun struct vc4_perfmon *perfmon = elem;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun vc4_perfmon_put(perfmon);
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun return 0;
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun
vc4_perfmon_close_file(struct vc4_file * vc4file)92*4882a593Smuzhiyun void vc4_perfmon_close_file(struct vc4_file *vc4file)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun mutex_lock(&vc4file->perfmon.lock);
95*4882a593Smuzhiyun idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
96*4882a593Smuzhiyun idr_destroy(&vc4file->perfmon.idr);
97*4882a593Smuzhiyun mutex_unlock(&vc4file->perfmon.lock);
98*4882a593Smuzhiyun }
99*4882a593Smuzhiyun
vc4_perfmon_create_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)100*4882a593Smuzhiyun int vc4_perfmon_create_ioctl(struct drm_device *dev, void *data,
101*4882a593Smuzhiyun struct drm_file *file_priv)
102*4882a593Smuzhiyun {
103*4882a593Smuzhiyun struct vc4_dev *vc4 = to_vc4_dev(dev);
104*4882a593Smuzhiyun struct vc4_file *vc4file = file_priv->driver_priv;
105*4882a593Smuzhiyun struct drm_vc4_perfmon_create *req = data;
106*4882a593Smuzhiyun struct vc4_perfmon *perfmon;
107*4882a593Smuzhiyun unsigned int i;
108*4882a593Smuzhiyun int ret;
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (!vc4->v3d) {
111*4882a593Smuzhiyun DRM_DEBUG("Creating perfmon no VC4 V3D probed\n");
112*4882a593Smuzhiyun return -ENODEV;
113*4882a593Smuzhiyun }
114*4882a593Smuzhiyun
115*4882a593Smuzhiyun /* Number of monitored counters cannot exceed HW limits. */
116*4882a593Smuzhiyun if (req->ncounters > DRM_VC4_MAX_PERF_COUNTERS ||
117*4882a593Smuzhiyun !req->ncounters)
118*4882a593Smuzhiyun return -EINVAL;
119*4882a593Smuzhiyun
120*4882a593Smuzhiyun /* Make sure all events are valid. */
121*4882a593Smuzhiyun for (i = 0; i < req->ncounters; i++) {
122*4882a593Smuzhiyun if (req->events[i] >= VC4_PERFCNT_NUM_EVENTS)
123*4882a593Smuzhiyun return -EINVAL;
124*4882a593Smuzhiyun }
125*4882a593Smuzhiyun
126*4882a593Smuzhiyun perfmon = kzalloc(struct_size(perfmon, counters, req->ncounters),
127*4882a593Smuzhiyun GFP_KERNEL);
128*4882a593Smuzhiyun if (!perfmon)
129*4882a593Smuzhiyun return -ENOMEM;
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun for (i = 0; i < req->ncounters; i++)
132*4882a593Smuzhiyun perfmon->events[i] = req->events[i];
133*4882a593Smuzhiyun
134*4882a593Smuzhiyun perfmon->ncounters = req->ncounters;
135*4882a593Smuzhiyun
136*4882a593Smuzhiyun refcount_set(&perfmon->refcnt, 1);
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun mutex_lock(&vc4file->perfmon.lock);
139*4882a593Smuzhiyun ret = idr_alloc(&vc4file->perfmon.idr, perfmon, VC4_PERFMONID_MIN,
140*4882a593Smuzhiyun VC4_PERFMONID_MAX, GFP_KERNEL);
141*4882a593Smuzhiyun mutex_unlock(&vc4file->perfmon.lock);
142*4882a593Smuzhiyun
143*4882a593Smuzhiyun if (ret < 0) {
144*4882a593Smuzhiyun kfree(perfmon);
145*4882a593Smuzhiyun return ret;
146*4882a593Smuzhiyun }
147*4882a593Smuzhiyun
148*4882a593Smuzhiyun req->id = ret;
149*4882a593Smuzhiyun return 0;
150*4882a593Smuzhiyun }
151*4882a593Smuzhiyun
vc4_perfmon_destroy_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)152*4882a593Smuzhiyun int vc4_perfmon_destroy_ioctl(struct drm_device *dev, void *data,
153*4882a593Smuzhiyun struct drm_file *file_priv)
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun struct vc4_dev *vc4 = to_vc4_dev(dev);
156*4882a593Smuzhiyun struct vc4_file *vc4file = file_priv->driver_priv;
157*4882a593Smuzhiyun struct drm_vc4_perfmon_destroy *req = data;
158*4882a593Smuzhiyun struct vc4_perfmon *perfmon;
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun if (!vc4->v3d) {
161*4882a593Smuzhiyun DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n");
162*4882a593Smuzhiyun return -ENODEV;
163*4882a593Smuzhiyun }
164*4882a593Smuzhiyun
165*4882a593Smuzhiyun mutex_lock(&vc4file->perfmon.lock);
166*4882a593Smuzhiyun perfmon = idr_remove(&vc4file->perfmon.idr, req->id);
167*4882a593Smuzhiyun mutex_unlock(&vc4file->perfmon.lock);
168*4882a593Smuzhiyun
169*4882a593Smuzhiyun if (!perfmon)
170*4882a593Smuzhiyun return -EINVAL;
171*4882a593Smuzhiyun
172*4882a593Smuzhiyun vc4_perfmon_put(perfmon);
173*4882a593Smuzhiyun return 0;
174*4882a593Smuzhiyun }
175*4882a593Smuzhiyun
vc4_perfmon_get_values_ioctl(struct drm_device * dev,void * data,struct drm_file * file_priv)176*4882a593Smuzhiyun int vc4_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
177*4882a593Smuzhiyun struct drm_file *file_priv)
178*4882a593Smuzhiyun {
179*4882a593Smuzhiyun struct vc4_dev *vc4 = to_vc4_dev(dev);
180*4882a593Smuzhiyun struct vc4_file *vc4file = file_priv->driver_priv;
181*4882a593Smuzhiyun struct drm_vc4_perfmon_get_values *req = data;
182*4882a593Smuzhiyun struct vc4_perfmon *perfmon;
183*4882a593Smuzhiyun int ret;
184*4882a593Smuzhiyun
185*4882a593Smuzhiyun if (!vc4->v3d) {
186*4882a593Smuzhiyun DRM_DEBUG("Getting perfmon no VC4 V3D probed\n");
187*4882a593Smuzhiyun return -ENODEV;
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
190*4882a593Smuzhiyun mutex_lock(&vc4file->perfmon.lock);
191*4882a593Smuzhiyun perfmon = idr_find(&vc4file->perfmon.idr, req->id);
192*4882a593Smuzhiyun vc4_perfmon_get(perfmon);
193*4882a593Smuzhiyun mutex_unlock(&vc4file->perfmon.lock);
194*4882a593Smuzhiyun
195*4882a593Smuzhiyun if (!perfmon)
196*4882a593Smuzhiyun return -EINVAL;
197*4882a593Smuzhiyun
198*4882a593Smuzhiyun if (copy_to_user(u64_to_user_ptr(req->values_ptr), perfmon->counters,
199*4882a593Smuzhiyun perfmon->ncounters * sizeof(u64)))
200*4882a593Smuzhiyun ret = -EFAULT;
201*4882a593Smuzhiyun else
202*4882a593Smuzhiyun ret = 0;
203*4882a593Smuzhiyun
204*4882a593Smuzhiyun vc4_perfmon_put(perfmon);
205*4882a593Smuzhiyun return ret;
206*4882a593Smuzhiyun }
207