xref: /OK3568_Linux_fs/kernel/arch/x86/events/rapl.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-only
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Support Intel/AMD RAPL energy consumption counters
4*4882a593Smuzhiyun  * Copyright (C) 2013 Google, Inc., Stephane Eranian
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  * Intel RAPL interface is specified in the IA-32 Manual Vol3b
7*4882a593Smuzhiyun  * section 14.7.1 (September 2013)
8*4882a593Smuzhiyun  *
9*4882a593Smuzhiyun  * AMD RAPL interface for Fam17h is described in the public PPR:
10*4882a593Smuzhiyun  * https://bugzilla.kernel.org/show_bug.cgi?id=206537
11*4882a593Smuzhiyun  *
12*4882a593Smuzhiyun  * RAPL provides more controls than just reporting energy consumption
13*4882a593Smuzhiyun  * however here we only expose the 3 energy consumption free running
14*4882a593Smuzhiyun  * counters (pp0, pkg, dram).
15*4882a593Smuzhiyun  *
16*4882a593Smuzhiyun  * Each of those counters increments in a power unit defined by the
17*4882a593Smuzhiyun  * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules
18*4882a593Smuzhiyun  * but it can vary.
19*4882a593Smuzhiyun  *
20*4882a593Smuzhiyun  * Counter to rapl events mappings:
21*4882a593Smuzhiyun  *
22*4882a593Smuzhiyun  *  pp0 counter: consumption of all physical cores (power plane 0)
23*4882a593Smuzhiyun  * 	  event: rapl_energy_cores
24*4882a593Smuzhiyun  *    perf code: 0x1
25*4882a593Smuzhiyun  *
26*4882a593Smuzhiyun  *  pkg counter: consumption of the whole processor package
27*4882a593Smuzhiyun  *	  event: rapl_energy_pkg
28*4882a593Smuzhiyun  *    perf code: 0x2
29*4882a593Smuzhiyun  *
30*4882a593Smuzhiyun  * dram counter: consumption of the dram domain (servers only)
31*4882a593Smuzhiyun  *	  event: rapl_energy_dram
32*4882a593Smuzhiyun  *    perf code: 0x3
33*4882a593Smuzhiyun  *
34*4882a593Smuzhiyun  * gpu counter: consumption of the builtin-gpu domain (client only)
35*4882a593Smuzhiyun  *	  event: rapl_energy_gpu
36*4882a593Smuzhiyun  *    perf code: 0x4
37*4882a593Smuzhiyun  *
38*4882a593Smuzhiyun  *  psys counter: consumption of the builtin-psys domain (client only)
39*4882a593Smuzhiyun  *	  event: rapl_energy_psys
40*4882a593Smuzhiyun  *    perf code: 0x5
41*4882a593Smuzhiyun  *
42*4882a593Smuzhiyun  * We manage those counters as free running (read-only). They may be
43*4882a593Smuzhiyun  * use simultaneously by other tools, such as turbostat.
44*4882a593Smuzhiyun  *
45*4882a593Smuzhiyun  * The events only support system-wide mode counting. There is no
46*4882a593Smuzhiyun  * sampling support because it does not make sense and is not
47*4882a593Smuzhiyun  * supported by the RAPL hardware.
48*4882a593Smuzhiyun  *
49*4882a593Smuzhiyun  * Because we want to avoid floating-point operations in the kernel,
50*4882a593Smuzhiyun  * the events are all reported in fixed point arithmetic (32.32).
51*4882a593Smuzhiyun  * Tools must adjust the counts to convert them to Watts using
52*4882a593Smuzhiyun  * the duration of the measurement. Tools may use a function such as
53*4882a593Smuzhiyun  * ldexp(raw_count, -32);
54*4882a593Smuzhiyun  */
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun #define pr_fmt(fmt) "RAPL PMU: " fmt
57*4882a593Smuzhiyun 
58*4882a593Smuzhiyun #include <linux/module.h>
59*4882a593Smuzhiyun #include <linux/slab.h>
60*4882a593Smuzhiyun #include <linux/perf_event.h>
61*4882a593Smuzhiyun #include <linux/nospec.h>
62*4882a593Smuzhiyun #include <asm/cpu_device_id.h>
63*4882a593Smuzhiyun #include <asm/intel-family.h>
64*4882a593Smuzhiyun #include "perf_event.h"
65*4882a593Smuzhiyun #include "probe.h"
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun MODULE_LICENSE("GPL");
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun /*
70*4882a593Smuzhiyun  * RAPL energy status counters
71*4882a593Smuzhiyun  */
72*4882a593Smuzhiyun enum perf_rapl_events {
73*4882a593Smuzhiyun 	PERF_RAPL_PP0 = 0,		/* all cores */
74*4882a593Smuzhiyun 	PERF_RAPL_PKG,			/* entire package */
75*4882a593Smuzhiyun 	PERF_RAPL_RAM,			/* DRAM */
76*4882a593Smuzhiyun 	PERF_RAPL_PP1,			/* gpu */
77*4882a593Smuzhiyun 	PERF_RAPL_PSYS,			/* psys */
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun 	PERF_RAPL_MAX,
80*4882a593Smuzhiyun 	NR_RAPL_DOMAINS = PERF_RAPL_MAX,
81*4882a593Smuzhiyun };
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun static const char *const rapl_domain_names[NR_RAPL_DOMAINS] __initconst = {
84*4882a593Smuzhiyun 	"pp0-core",
85*4882a593Smuzhiyun 	"package",
86*4882a593Smuzhiyun 	"dram",
87*4882a593Smuzhiyun 	"pp1-gpu",
88*4882a593Smuzhiyun 	"psys",
89*4882a593Smuzhiyun };
90*4882a593Smuzhiyun 
91*4882a593Smuzhiyun /*
92*4882a593Smuzhiyun  * event code: LSB 8 bits, passed in attr->config
93*4882a593Smuzhiyun  * any other bit is reserved
94*4882a593Smuzhiyun  */
95*4882a593Smuzhiyun #define RAPL_EVENT_MASK	0xFFULL
96*4882a593Smuzhiyun #define RAPL_CNTR_WIDTH 32
97*4882a593Smuzhiyun 
98*4882a593Smuzhiyun #define RAPL_EVENT_ATTR_STR(_name, v, str)					\
99*4882a593Smuzhiyun static struct perf_pmu_events_attr event_attr_##v = {				\
100*4882a593Smuzhiyun 	.attr		= __ATTR(_name, 0444, perf_event_sysfs_show, NULL),	\
101*4882a593Smuzhiyun 	.id		= 0,							\
102*4882a593Smuzhiyun 	.event_str	= str,							\
103*4882a593Smuzhiyun };
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun struct rapl_pmu {
106*4882a593Smuzhiyun 	raw_spinlock_t		lock;
107*4882a593Smuzhiyun 	int			n_active;
108*4882a593Smuzhiyun 	int			cpu;
109*4882a593Smuzhiyun 	struct list_head	active_list;
110*4882a593Smuzhiyun 	struct pmu		*pmu;
111*4882a593Smuzhiyun 	ktime_t			timer_interval;
112*4882a593Smuzhiyun 	struct hrtimer		hrtimer;
113*4882a593Smuzhiyun };
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun struct rapl_pmus {
116*4882a593Smuzhiyun 	struct pmu		pmu;
117*4882a593Smuzhiyun 	unsigned int		maxdie;
118*4882a593Smuzhiyun 	struct rapl_pmu		*pmus[];
119*4882a593Smuzhiyun };
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun enum rapl_unit_quirk {
122*4882a593Smuzhiyun 	RAPL_UNIT_QUIRK_NONE,
123*4882a593Smuzhiyun 	RAPL_UNIT_QUIRK_INTEL_HSW,
124*4882a593Smuzhiyun 	RAPL_UNIT_QUIRK_INTEL_SPR,
125*4882a593Smuzhiyun };
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun struct rapl_model {
128*4882a593Smuzhiyun 	struct perf_msr *rapl_msrs;
129*4882a593Smuzhiyun 	unsigned long	events;
130*4882a593Smuzhiyun 	unsigned int	msr_power_unit;
131*4882a593Smuzhiyun 	enum rapl_unit_quirk	unit_quirk;
132*4882a593Smuzhiyun };
133*4882a593Smuzhiyun 
134*4882a593Smuzhiyun  /* 1/2^hw_unit Joule */
135*4882a593Smuzhiyun static int rapl_hw_unit[NR_RAPL_DOMAINS] __read_mostly;
136*4882a593Smuzhiyun static struct rapl_pmus *rapl_pmus;
137*4882a593Smuzhiyun static cpumask_t rapl_cpu_mask;
138*4882a593Smuzhiyun static unsigned int rapl_cntr_mask;
139*4882a593Smuzhiyun static u64 rapl_timer_ms;
140*4882a593Smuzhiyun static struct perf_msr *rapl_msrs;
141*4882a593Smuzhiyun 
cpu_to_rapl_pmu(unsigned int cpu)142*4882a593Smuzhiyun static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu)
143*4882a593Smuzhiyun {
144*4882a593Smuzhiyun 	unsigned int dieid = topology_logical_die_id(cpu);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 	/*
147*4882a593Smuzhiyun 	 * The unsigned check also catches the '-1' return value for non
148*4882a593Smuzhiyun 	 * existent mappings in the topology map.
149*4882a593Smuzhiyun 	 */
150*4882a593Smuzhiyun 	return dieid < rapl_pmus->maxdie ? rapl_pmus->pmus[dieid] : NULL;
151*4882a593Smuzhiyun }
152*4882a593Smuzhiyun 
rapl_read_counter(struct perf_event * event)153*4882a593Smuzhiyun static inline u64 rapl_read_counter(struct perf_event *event)
154*4882a593Smuzhiyun {
155*4882a593Smuzhiyun 	u64 raw;
156*4882a593Smuzhiyun 	rdmsrl(event->hw.event_base, raw);
157*4882a593Smuzhiyun 	return raw;
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun 
rapl_scale(u64 v,int cfg)160*4882a593Smuzhiyun static inline u64 rapl_scale(u64 v, int cfg)
161*4882a593Smuzhiyun {
162*4882a593Smuzhiyun 	if (cfg > NR_RAPL_DOMAINS) {
163*4882a593Smuzhiyun 		pr_warn("Invalid domain %d, failed to scale data\n", cfg);
164*4882a593Smuzhiyun 		return v;
165*4882a593Smuzhiyun 	}
166*4882a593Smuzhiyun 	/*
167*4882a593Smuzhiyun 	 * scale delta to smallest unit (1/2^32)
168*4882a593Smuzhiyun 	 * users must then scale back: count * 1/(1e9*2^32) to get Joules
169*4882a593Smuzhiyun 	 * or use ldexp(count, -32).
170*4882a593Smuzhiyun 	 * Watts = Joules/Time delta
171*4882a593Smuzhiyun 	 */
172*4882a593Smuzhiyun 	return v << (32 - rapl_hw_unit[cfg - 1]);
173*4882a593Smuzhiyun }
174*4882a593Smuzhiyun 
rapl_event_update(struct perf_event * event)175*4882a593Smuzhiyun static u64 rapl_event_update(struct perf_event *event)
176*4882a593Smuzhiyun {
177*4882a593Smuzhiyun 	struct hw_perf_event *hwc = &event->hw;
178*4882a593Smuzhiyun 	u64 prev_raw_count, new_raw_count;
179*4882a593Smuzhiyun 	s64 delta, sdelta;
180*4882a593Smuzhiyun 	int shift = RAPL_CNTR_WIDTH;
181*4882a593Smuzhiyun 
182*4882a593Smuzhiyun again:
183*4882a593Smuzhiyun 	prev_raw_count = local64_read(&hwc->prev_count);
184*4882a593Smuzhiyun 	rdmsrl(event->hw.event_base, new_raw_count);
185*4882a593Smuzhiyun 
186*4882a593Smuzhiyun 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
187*4882a593Smuzhiyun 			    new_raw_count) != prev_raw_count) {
188*4882a593Smuzhiyun 		cpu_relax();
189*4882a593Smuzhiyun 		goto again;
190*4882a593Smuzhiyun 	}
191*4882a593Smuzhiyun 
192*4882a593Smuzhiyun 	/*
193*4882a593Smuzhiyun 	 * Now we have the new raw value and have updated the prev
194*4882a593Smuzhiyun 	 * timestamp already. We can now calculate the elapsed delta
195*4882a593Smuzhiyun 	 * (event-)time and add that to the generic event.
196*4882a593Smuzhiyun 	 *
197*4882a593Smuzhiyun 	 * Careful, not all hw sign-extends above the physical width
198*4882a593Smuzhiyun 	 * of the count.
199*4882a593Smuzhiyun 	 */
200*4882a593Smuzhiyun 	delta = (new_raw_count << shift) - (prev_raw_count << shift);
201*4882a593Smuzhiyun 	delta >>= shift;
202*4882a593Smuzhiyun 
203*4882a593Smuzhiyun 	sdelta = rapl_scale(delta, event->hw.config);
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	local64_add(sdelta, &event->count);
206*4882a593Smuzhiyun 
207*4882a593Smuzhiyun 	return new_raw_count;
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun 
rapl_start_hrtimer(struct rapl_pmu * pmu)210*4882a593Smuzhiyun static void rapl_start_hrtimer(struct rapl_pmu *pmu)
211*4882a593Smuzhiyun {
212*4882a593Smuzhiyun        hrtimer_start(&pmu->hrtimer, pmu->timer_interval,
213*4882a593Smuzhiyun 		     HRTIMER_MODE_REL_PINNED);
214*4882a593Smuzhiyun }
215*4882a593Smuzhiyun 
rapl_hrtimer_handle(struct hrtimer * hrtimer)216*4882a593Smuzhiyun static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
217*4882a593Smuzhiyun {
218*4882a593Smuzhiyun 	struct rapl_pmu *pmu = container_of(hrtimer, struct rapl_pmu, hrtimer);
219*4882a593Smuzhiyun 	struct perf_event *event;
220*4882a593Smuzhiyun 	unsigned long flags;
221*4882a593Smuzhiyun 
222*4882a593Smuzhiyun 	if (!pmu->n_active)
223*4882a593Smuzhiyun 		return HRTIMER_NORESTART;
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	raw_spin_lock_irqsave(&pmu->lock, flags);
226*4882a593Smuzhiyun 
227*4882a593Smuzhiyun 	list_for_each_entry(event, &pmu->active_list, active_entry)
228*4882a593Smuzhiyun 		rapl_event_update(event);
229*4882a593Smuzhiyun 
230*4882a593Smuzhiyun 	raw_spin_unlock_irqrestore(&pmu->lock, flags);
231*4882a593Smuzhiyun 
232*4882a593Smuzhiyun 	hrtimer_forward_now(hrtimer, pmu->timer_interval);
233*4882a593Smuzhiyun 
234*4882a593Smuzhiyun 	return HRTIMER_RESTART;
235*4882a593Smuzhiyun }
236*4882a593Smuzhiyun 
rapl_hrtimer_init(struct rapl_pmu * pmu)237*4882a593Smuzhiyun static void rapl_hrtimer_init(struct rapl_pmu *pmu)
238*4882a593Smuzhiyun {
239*4882a593Smuzhiyun 	struct hrtimer *hr = &pmu->hrtimer;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
242*4882a593Smuzhiyun 	hr->function = rapl_hrtimer_handle;
243*4882a593Smuzhiyun }
244*4882a593Smuzhiyun 
__rapl_pmu_event_start(struct rapl_pmu * pmu,struct perf_event * event)245*4882a593Smuzhiyun static void __rapl_pmu_event_start(struct rapl_pmu *pmu,
246*4882a593Smuzhiyun 				   struct perf_event *event)
247*4882a593Smuzhiyun {
248*4882a593Smuzhiyun 	if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
249*4882a593Smuzhiyun 		return;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	event->hw.state = 0;
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	list_add_tail(&event->active_entry, &pmu->active_list);
254*4882a593Smuzhiyun 
255*4882a593Smuzhiyun 	local64_set(&event->hw.prev_count, rapl_read_counter(event));
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	pmu->n_active++;
258*4882a593Smuzhiyun 	if (pmu->n_active == 1)
259*4882a593Smuzhiyun 		rapl_start_hrtimer(pmu);
260*4882a593Smuzhiyun }
261*4882a593Smuzhiyun 
rapl_pmu_event_start(struct perf_event * event,int mode)262*4882a593Smuzhiyun static void rapl_pmu_event_start(struct perf_event *event, int mode)
263*4882a593Smuzhiyun {
264*4882a593Smuzhiyun 	struct rapl_pmu *pmu = event->pmu_private;
265*4882a593Smuzhiyun 	unsigned long flags;
266*4882a593Smuzhiyun 
267*4882a593Smuzhiyun 	raw_spin_lock_irqsave(&pmu->lock, flags);
268*4882a593Smuzhiyun 	__rapl_pmu_event_start(pmu, event);
269*4882a593Smuzhiyun 	raw_spin_unlock_irqrestore(&pmu->lock, flags);
270*4882a593Smuzhiyun }
271*4882a593Smuzhiyun 
rapl_pmu_event_stop(struct perf_event * event,int mode)272*4882a593Smuzhiyun static void rapl_pmu_event_stop(struct perf_event *event, int mode)
273*4882a593Smuzhiyun {
274*4882a593Smuzhiyun 	struct rapl_pmu *pmu = event->pmu_private;
275*4882a593Smuzhiyun 	struct hw_perf_event *hwc = &event->hw;
276*4882a593Smuzhiyun 	unsigned long flags;
277*4882a593Smuzhiyun 
278*4882a593Smuzhiyun 	raw_spin_lock_irqsave(&pmu->lock, flags);
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	/* mark event as deactivated and stopped */
281*4882a593Smuzhiyun 	if (!(hwc->state & PERF_HES_STOPPED)) {
282*4882a593Smuzhiyun 		WARN_ON_ONCE(pmu->n_active <= 0);
283*4882a593Smuzhiyun 		pmu->n_active--;
284*4882a593Smuzhiyun 		if (pmu->n_active == 0)
285*4882a593Smuzhiyun 			hrtimer_cancel(&pmu->hrtimer);
286*4882a593Smuzhiyun 
287*4882a593Smuzhiyun 		list_del(&event->active_entry);
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 		WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
290*4882a593Smuzhiyun 		hwc->state |= PERF_HES_STOPPED;
291*4882a593Smuzhiyun 	}
292*4882a593Smuzhiyun 
293*4882a593Smuzhiyun 	/* check if update of sw counter is necessary */
294*4882a593Smuzhiyun 	if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
295*4882a593Smuzhiyun 		/*
296*4882a593Smuzhiyun 		 * Drain the remaining delta count out of a event
297*4882a593Smuzhiyun 		 * that we are disabling:
298*4882a593Smuzhiyun 		 */
299*4882a593Smuzhiyun 		rapl_event_update(event);
300*4882a593Smuzhiyun 		hwc->state |= PERF_HES_UPTODATE;
301*4882a593Smuzhiyun 	}
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 	raw_spin_unlock_irqrestore(&pmu->lock, flags);
304*4882a593Smuzhiyun }
305*4882a593Smuzhiyun 
rapl_pmu_event_add(struct perf_event * event,int mode)306*4882a593Smuzhiyun static int rapl_pmu_event_add(struct perf_event *event, int mode)
307*4882a593Smuzhiyun {
308*4882a593Smuzhiyun 	struct rapl_pmu *pmu = event->pmu_private;
309*4882a593Smuzhiyun 	struct hw_perf_event *hwc = &event->hw;
310*4882a593Smuzhiyun 	unsigned long flags;
311*4882a593Smuzhiyun 
312*4882a593Smuzhiyun 	raw_spin_lock_irqsave(&pmu->lock, flags);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
315*4882a593Smuzhiyun 
316*4882a593Smuzhiyun 	if (mode & PERF_EF_START)
317*4882a593Smuzhiyun 		__rapl_pmu_event_start(pmu, event);
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	raw_spin_unlock_irqrestore(&pmu->lock, flags);
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	return 0;
322*4882a593Smuzhiyun }
323*4882a593Smuzhiyun 
rapl_pmu_event_del(struct perf_event * event,int flags)324*4882a593Smuzhiyun static void rapl_pmu_event_del(struct perf_event *event, int flags)
325*4882a593Smuzhiyun {
326*4882a593Smuzhiyun 	rapl_pmu_event_stop(event, PERF_EF_UPDATE);
327*4882a593Smuzhiyun }
328*4882a593Smuzhiyun 
rapl_pmu_event_init(struct perf_event * event)329*4882a593Smuzhiyun static int rapl_pmu_event_init(struct perf_event *event)
330*4882a593Smuzhiyun {
331*4882a593Smuzhiyun 	u64 cfg = event->attr.config & RAPL_EVENT_MASK;
332*4882a593Smuzhiyun 	int bit, ret = 0;
333*4882a593Smuzhiyun 	struct rapl_pmu *pmu;
334*4882a593Smuzhiyun 
335*4882a593Smuzhiyun 	/* only look at RAPL events */
336*4882a593Smuzhiyun 	if (event->attr.type != rapl_pmus->pmu.type)
337*4882a593Smuzhiyun 		return -ENOENT;
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	/* check only supported bits are set */
340*4882a593Smuzhiyun 	if (event->attr.config & ~RAPL_EVENT_MASK)
341*4882a593Smuzhiyun 		return -EINVAL;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 	if (event->cpu < 0)
344*4882a593Smuzhiyun 		return -EINVAL;
345*4882a593Smuzhiyun 
346*4882a593Smuzhiyun 	event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	if (!cfg || cfg >= NR_RAPL_DOMAINS + 1)
349*4882a593Smuzhiyun 		return -EINVAL;
350*4882a593Smuzhiyun 
351*4882a593Smuzhiyun 	cfg = array_index_nospec((long)cfg, NR_RAPL_DOMAINS + 1);
352*4882a593Smuzhiyun 	bit = cfg - 1;
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	/* check event supported */
355*4882a593Smuzhiyun 	if (!(rapl_cntr_mask & (1 << bit)))
356*4882a593Smuzhiyun 		return -EINVAL;
357*4882a593Smuzhiyun 
358*4882a593Smuzhiyun 	/* unsupported modes and filters */
359*4882a593Smuzhiyun 	if (event->attr.sample_period) /* no sampling */
360*4882a593Smuzhiyun 		return -EINVAL;
361*4882a593Smuzhiyun 
362*4882a593Smuzhiyun 	/* must be done before validate_group */
363*4882a593Smuzhiyun 	pmu = cpu_to_rapl_pmu(event->cpu);
364*4882a593Smuzhiyun 	if (!pmu)
365*4882a593Smuzhiyun 		return -EINVAL;
366*4882a593Smuzhiyun 	event->cpu = pmu->cpu;
367*4882a593Smuzhiyun 	event->pmu_private = pmu;
368*4882a593Smuzhiyun 	event->hw.event_base = rapl_msrs[bit].msr;
369*4882a593Smuzhiyun 	event->hw.config = cfg;
370*4882a593Smuzhiyun 	event->hw.idx = bit;
371*4882a593Smuzhiyun 
372*4882a593Smuzhiyun 	return ret;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun 
rapl_pmu_event_read(struct perf_event * event)375*4882a593Smuzhiyun static void rapl_pmu_event_read(struct perf_event *event)
376*4882a593Smuzhiyun {
377*4882a593Smuzhiyun 	rapl_event_update(event);
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun 
rapl_get_attr_cpumask(struct device * dev,struct device_attribute * attr,char * buf)380*4882a593Smuzhiyun static ssize_t rapl_get_attr_cpumask(struct device *dev,
381*4882a593Smuzhiyun 				struct device_attribute *attr, char *buf)
382*4882a593Smuzhiyun {
383*4882a593Smuzhiyun 	return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
384*4882a593Smuzhiyun }
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
387*4882a593Smuzhiyun 
388*4882a593Smuzhiyun static struct attribute *rapl_pmu_attrs[] = {
389*4882a593Smuzhiyun 	&dev_attr_cpumask.attr,
390*4882a593Smuzhiyun 	NULL,
391*4882a593Smuzhiyun };
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun static struct attribute_group rapl_pmu_attr_group = {
394*4882a593Smuzhiyun 	.attrs = rapl_pmu_attrs,
395*4882a593Smuzhiyun };
396*4882a593Smuzhiyun 
397*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
398*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-pkg  ,   rapl_pkg, "event=0x02");
399*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-ram  ,   rapl_ram, "event=0x03");
400*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-gpu  ,   rapl_gpu, "event=0x04");
401*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-psys,   rapl_psys, "event=0x05");
402*4882a593Smuzhiyun 
403*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules");
404*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-pkg.unit  ,   rapl_pkg_unit, "Joules");
405*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-ram.unit  ,   rapl_ram_unit, "Joules");
406*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-gpu.unit  ,   rapl_gpu_unit, "Joules");
407*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-psys.unit,   rapl_psys_unit, "Joules");
408*4882a593Smuzhiyun 
409*4882a593Smuzhiyun /*
410*4882a593Smuzhiyun  * we compute in 0.23 nJ increments regardless of MSR
411*4882a593Smuzhiyun  */
412*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10");
413*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-pkg.scale,     rapl_pkg_scale, "2.3283064365386962890625e-10");
414*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-ram.scale,     rapl_ram_scale, "2.3283064365386962890625e-10");
415*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-gpu.scale,     rapl_gpu_scale, "2.3283064365386962890625e-10");
416*4882a593Smuzhiyun RAPL_EVENT_ATTR_STR(energy-psys.scale,   rapl_psys_scale, "2.3283064365386962890625e-10");
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun /*
419*4882a593Smuzhiyun  * There are no default events, but we need to create
420*4882a593Smuzhiyun  * "events" group (with empty attrs) before updating
421*4882a593Smuzhiyun  * it with detected events.
422*4882a593Smuzhiyun  */
423*4882a593Smuzhiyun static struct attribute *attrs_empty[] = {
424*4882a593Smuzhiyun 	NULL,
425*4882a593Smuzhiyun };
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun static struct attribute_group rapl_pmu_events_group = {
428*4882a593Smuzhiyun 	.name = "events",
429*4882a593Smuzhiyun 	.attrs = attrs_empty,
430*4882a593Smuzhiyun };
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun PMU_FORMAT_ATTR(event, "config:0-7");
433*4882a593Smuzhiyun static struct attribute *rapl_formats_attr[] = {
434*4882a593Smuzhiyun 	&format_attr_event.attr,
435*4882a593Smuzhiyun 	NULL,
436*4882a593Smuzhiyun };
437*4882a593Smuzhiyun 
438*4882a593Smuzhiyun static struct attribute_group rapl_pmu_format_group = {
439*4882a593Smuzhiyun 	.name = "format",
440*4882a593Smuzhiyun 	.attrs = rapl_formats_attr,
441*4882a593Smuzhiyun };
442*4882a593Smuzhiyun 
443*4882a593Smuzhiyun static const struct attribute_group *rapl_attr_groups[] = {
444*4882a593Smuzhiyun 	&rapl_pmu_attr_group,
445*4882a593Smuzhiyun 	&rapl_pmu_format_group,
446*4882a593Smuzhiyun 	&rapl_pmu_events_group,
447*4882a593Smuzhiyun 	NULL,
448*4882a593Smuzhiyun };
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun static struct attribute *rapl_events_cores[] = {
451*4882a593Smuzhiyun 	EVENT_PTR(rapl_cores),
452*4882a593Smuzhiyun 	EVENT_PTR(rapl_cores_unit),
453*4882a593Smuzhiyun 	EVENT_PTR(rapl_cores_scale),
454*4882a593Smuzhiyun 	NULL,
455*4882a593Smuzhiyun };
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun static umode_t
rapl_not_visible(struct kobject * kobj,struct attribute * attr,int i)458*4882a593Smuzhiyun rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun 	return 0;
461*4882a593Smuzhiyun }
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun static struct attribute_group rapl_events_cores_group = {
464*4882a593Smuzhiyun 	.name  = "events",
465*4882a593Smuzhiyun 	.attrs = rapl_events_cores,
466*4882a593Smuzhiyun 	.is_visible = rapl_not_visible,
467*4882a593Smuzhiyun };
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun static struct attribute *rapl_events_pkg[] = {
470*4882a593Smuzhiyun 	EVENT_PTR(rapl_pkg),
471*4882a593Smuzhiyun 	EVENT_PTR(rapl_pkg_unit),
472*4882a593Smuzhiyun 	EVENT_PTR(rapl_pkg_scale),
473*4882a593Smuzhiyun 	NULL,
474*4882a593Smuzhiyun };
475*4882a593Smuzhiyun 
476*4882a593Smuzhiyun static struct attribute_group rapl_events_pkg_group = {
477*4882a593Smuzhiyun 	.name  = "events",
478*4882a593Smuzhiyun 	.attrs = rapl_events_pkg,
479*4882a593Smuzhiyun 	.is_visible = rapl_not_visible,
480*4882a593Smuzhiyun };
481*4882a593Smuzhiyun 
482*4882a593Smuzhiyun static struct attribute *rapl_events_ram[] = {
483*4882a593Smuzhiyun 	EVENT_PTR(rapl_ram),
484*4882a593Smuzhiyun 	EVENT_PTR(rapl_ram_unit),
485*4882a593Smuzhiyun 	EVENT_PTR(rapl_ram_scale),
486*4882a593Smuzhiyun 	NULL,
487*4882a593Smuzhiyun };
488*4882a593Smuzhiyun 
489*4882a593Smuzhiyun static struct attribute_group rapl_events_ram_group = {
490*4882a593Smuzhiyun 	.name  = "events",
491*4882a593Smuzhiyun 	.attrs = rapl_events_ram,
492*4882a593Smuzhiyun 	.is_visible = rapl_not_visible,
493*4882a593Smuzhiyun };
494*4882a593Smuzhiyun 
495*4882a593Smuzhiyun static struct attribute *rapl_events_gpu[] = {
496*4882a593Smuzhiyun 	EVENT_PTR(rapl_gpu),
497*4882a593Smuzhiyun 	EVENT_PTR(rapl_gpu_unit),
498*4882a593Smuzhiyun 	EVENT_PTR(rapl_gpu_scale),
499*4882a593Smuzhiyun 	NULL,
500*4882a593Smuzhiyun };
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun static struct attribute_group rapl_events_gpu_group = {
503*4882a593Smuzhiyun 	.name  = "events",
504*4882a593Smuzhiyun 	.attrs = rapl_events_gpu,
505*4882a593Smuzhiyun 	.is_visible = rapl_not_visible,
506*4882a593Smuzhiyun };
507*4882a593Smuzhiyun 
508*4882a593Smuzhiyun static struct attribute *rapl_events_psys[] = {
509*4882a593Smuzhiyun 	EVENT_PTR(rapl_psys),
510*4882a593Smuzhiyun 	EVENT_PTR(rapl_psys_unit),
511*4882a593Smuzhiyun 	EVENT_PTR(rapl_psys_scale),
512*4882a593Smuzhiyun 	NULL,
513*4882a593Smuzhiyun };
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun static struct attribute_group rapl_events_psys_group = {
516*4882a593Smuzhiyun 	.name  = "events",
517*4882a593Smuzhiyun 	.attrs = rapl_events_psys,
518*4882a593Smuzhiyun 	.is_visible = rapl_not_visible,
519*4882a593Smuzhiyun };
520*4882a593Smuzhiyun 
test_msr(int idx,void * data)521*4882a593Smuzhiyun static bool test_msr(int idx, void *data)
522*4882a593Smuzhiyun {
523*4882a593Smuzhiyun 	return test_bit(idx, (unsigned long *) data);
524*4882a593Smuzhiyun }
525*4882a593Smuzhiyun 
526*4882a593Smuzhiyun static struct perf_msr intel_rapl_msrs[] = {
527*4882a593Smuzhiyun 	[PERF_RAPL_PP0]  = { MSR_PP0_ENERGY_STATUS,      &rapl_events_cores_group, test_msr },
528*4882a593Smuzhiyun 	[PERF_RAPL_PKG]  = { MSR_PKG_ENERGY_STATUS,      &rapl_events_pkg_group,   test_msr },
529*4882a593Smuzhiyun 	[PERF_RAPL_RAM]  = { MSR_DRAM_ENERGY_STATUS,     &rapl_events_ram_group,   test_msr },
530*4882a593Smuzhiyun 	[PERF_RAPL_PP1]  = { MSR_PP1_ENERGY_STATUS,      &rapl_events_gpu_group,   test_msr },
531*4882a593Smuzhiyun 	[PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group,  test_msr },
532*4882a593Smuzhiyun };
533*4882a593Smuzhiyun 
534*4882a593Smuzhiyun /*
535*4882a593Smuzhiyun  * Force to PERF_RAPL_MAX size due to:
536*4882a593Smuzhiyun  * - perf_msr_probe(PERF_RAPL_MAX)
537*4882a593Smuzhiyun  * - want to use same event codes across both architectures
538*4882a593Smuzhiyun  */
539*4882a593Smuzhiyun static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = {
540*4882a593Smuzhiyun 	[PERF_RAPL_PKG]  = { MSR_AMD_PKG_ENERGY_STATUS,  &rapl_events_pkg_group,   test_msr },
541*4882a593Smuzhiyun };
542*4882a593Smuzhiyun 
543*4882a593Smuzhiyun 
rapl_cpu_offline(unsigned int cpu)544*4882a593Smuzhiyun static int rapl_cpu_offline(unsigned int cpu)
545*4882a593Smuzhiyun {
546*4882a593Smuzhiyun 	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
547*4882a593Smuzhiyun 	int target;
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	/* Check if exiting cpu is used for collecting rapl events */
550*4882a593Smuzhiyun 	if (!cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask))
551*4882a593Smuzhiyun 		return 0;
552*4882a593Smuzhiyun 
553*4882a593Smuzhiyun 	pmu->cpu = -1;
554*4882a593Smuzhiyun 	/* Find a new cpu to collect rapl events */
555*4882a593Smuzhiyun 	target = cpumask_any_but(topology_die_cpumask(cpu), cpu);
556*4882a593Smuzhiyun 
557*4882a593Smuzhiyun 	/* Migrate rapl events to the new target */
558*4882a593Smuzhiyun 	if (target < nr_cpu_ids) {
559*4882a593Smuzhiyun 		cpumask_set_cpu(target, &rapl_cpu_mask);
560*4882a593Smuzhiyun 		pmu->cpu = target;
561*4882a593Smuzhiyun 		perf_pmu_migrate_context(pmu->pmu, cpu, target);
562*4882a593Smuzhiyun 	}
563*4882a593Smuzhiyun 	return 0;
564*4882a593Smuzhiyun }
565*4882a593Smuzhiyun 
rapl_cpu_online(unsigned int cpu)566*4882a593Smuzhiyun static int rapl_cpu_online(unsigned int cpu)
567*4882a593Smuzhiyun {
568*4882a593Smuzhiyun 	struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu);
569*4882a593Smuzhiyun 	int target;
570*4882a593Smuzhiyun 
571*4882a593Smuzhiyun 	if (!pmu) {
572*4882a593Smuzhiyun 		pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu));
573*4882a593Smuzhiyun 		if (!pmu)
574*4882a593Smuzhiyun 			return -ENOMEM;
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 		raw_spin_lock_init(&pmu->lock);
577*4882a593Smuzhiyun 		INIT_LIST_HEAD(&pmu->active_list);
578*4882a593Smuzhiyun 		pmu->pmu = &rapl_pmus->pmu;
579*4882a593Smuzhiyun 		pmu->timer_interval = ms_to_ktime(rapl_timer_ms);
580*4882a593Smuzhiyun 		rapl_hrtimer_init(pmu);
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 		rapl_pmus->pmus[topology_logical_die_id(cpu)] = pmu;
583*4882a593Smuzhiyun 	}
584*4882a593Smuzhiyun 
585*4882a593Smuzhiyun 	/*
586*4882a593Smuzhiyun 	 * Check if there is an online cpu in the package which collects rapl
587*4882a593Smuzhiyun 	 * events already.
588*4882a593Smuzhiyun 	 */
589*4882a593Smuzhiyun 	target = cpumask_any_and(&rapl_cpu_mask, topology_die_cpumask(cpu));
590*4882a593Smuzhiyun 	if (target < nr_cpu_ids)
591*4882a593Smuzhiyun 		return 0;
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun 	cpumask_set_cpu(cpu, &rapl_cpu_mask);
594*4882a593Smuzhiyun 	pmu->cpu = cpu;
595*4882a593Smuzhiyun 	return 0;
596*4882a593Smuzhiyun }
597*4882a593Smuzhiyun 
rapl_check_hw_unit(struct rapl_model * rm)598*4882a593Smuzhiyun static int rapl_check_hw_unit(struct rapl_model *rm)
599*4882a593Smuzhiyun {
600*4882a593Smuzhiyun 	u64 msr_rapl_power_unit_bits;
601*4882a593Smuzhiyun 	int i;
602*4882a593Smuzhiyun 
603*4882a593Smuzhiyun 	/* protect rdmsrl() to handle virtualization */
604*4882a593Smuzhiyun 	if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits))
605*4882a593Smuzhiyun 		return -1;
606*4882a593Smuzhiyun 	for (i = 0; i < NR_RAPL_DOMAINS; i++)
607*4882a593Smuzhiyun 		rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL;
608*4882a593Smuzhiyun 
609*4882a593Smuzhiyun 	switch (rm->unit_quirk) {
610*4882a593Smuzhiyun 	/*
611*4882a593Smuzhiyun 	 * DRAM domain on HSW server and KNL has fixed energy unit which can be
612*4882a593Smuzhiyun 	 * different than the unit from power unit MSR. See
613*4882a593Smuzhiyun 	 * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2
614*4882a593Smuzhiyun 	 * of 2. Datasheet, September 2014, Reference Number: 330784-001 "
615*4882a593Smuzhiyun 	 */
616*4882a593Smuzhiyun 	case RAPL_UNIT_QUIRK_INTEL_HSW:
617*4882a593Smuzhiyun 		rapl_hw_unit[PERF_RAPL_RAM] = 16;
618*4882a593Smuzhiyun 		break;
619*4882a593Smuzhiyun 	/*
620*4882a593Smuzhiyun 	 * SPR shares the same DRAM domain energy unit as HSW, plus it
621*4882a593Smuzhiyun 	 * also has a fixed energy unit for Psys domain.
622*4882a593Smuzhiyun 	 */
623*4882a593Smuzhiyun 	case RAPL_UNIT_QUIRK_INTEL_SPR:
624*4882a593Smuzhiyun 		rapl_hw_unit[PERF_RAPL_RAM] = 16;
625*4882a593Smuzhiyun 		rapl_hw_unit[PERF_RAPL_PSYS] = 0;
626*4882a593Smuzhiyun 		break;
627*4882a593Smuzhiyun 	default:
628*4882a593Smuzhiyun 		break;
629*4882a593Smuzhiyun 	}
630*4882a593Smuzhiyun 
631*4882a593Smuzhiyun 
632*4882a593Smuzhiyun 	/*
633*4882a593Smuzhiyun 	 * Calculate the timer rate:
634*4882a593Smuzhiyun 	 * Use reference of 200W for scaling the timeout to avoid counter
635*4882a593Smuzhiyun 	 * overflows. 200W = 200 Joules/sec
636*4882a593Smuzhiyun 	 * Divide interval by 2 to avoid lockstep (2 * 100)
637*4882a593Smuzhiyun 	 * if hw unit is 32, then we use 2 ms 1/200/2
638*4882a593Smuzhiyun 	 */
639*4882a593Smuzhiyun 	rapl_timer_ms = 2;
640*4882a593Smuzhiyun 	if (rapl_hw_unit[0] < 32) {
641*4882a593Smuzhiyun 		rapl_timer_ms = (1000 / (2 * 100));
642*4882a593Smuzhiyun 		rapl_timer_ms *= (1ULL << (32 - rapl_hw_unit[0] - 1));
643*4882a593Smuzhiyun 	}
644*4882a593Smuzhiyun 	return 0;
645*4882a593Smuzhiyun }
646*4882a593Smuzhiyun 
rapl_advertise(void)647*4882a593Smuzhiyun static void __init rapl_advertise(void)
648*4882a593Smuzhiyun {
649*4882a593Smuzhiyun 	int i;
650*4882a593Smuzhiyun 
651*4882a593Smuzhiyun 	pr_info("API unit is 2^-32 Joules, %d fixed counters, %llu ms ovfl timer\n",
652*4882a593Smuzhiyun 		hweight32(rapl_cntr_mask), rapl_timer_ms);
653*4882a593Smuzhiyun 
654*4882a593Smuzhiyun 	for (i = 0; i < NR_RAPL_DOMAINS; i++) {
655*4882a593Smuzhiyun 		if (rapl_cntr_mask & (1 << i)) {
656*4882a593Smuzhiyun 			pr_info("hw unit of domain %s 2^-%d Joules\n",
657*4882a593Smuzhiyun 				rapl_domain_names[i], rapl_hw_unit[i]);
658*4882a593Smuzhiyun 		}
659*4882a593Smuzhiyun 	}
660*4882a593Smuzhiyun }
661*4882a593Smuzhiyun 
cleanup_rapl_pmus(void)662*4882a593Smuzhiyun static void cleanup_rapl_pmus(void)
663*4882a593Smuzhiyun {
664*4882a593Smuzhiyun 	int i;
665*4882a593Smuzhiyun 
666*4882a593Smuzhiyun 	for (i = 0; i < rapl_pmus->maxdie; i++)
667*4882a593Smuzhiyun 		kfree(rapl_pmus->pmus[i]);
668*4882a593Smuzhiyun 	kfree(rapl_pmus);
669*4882a593Smuzhiyun }
670*4882a593Smuzhiyun 
671*4882a593Smuzhiyun static const struct attribute_group *rapl_attr_update[] = {
672*4882a593Smuzhiyun 	&rapl_events_cores_group,
673*4882a593Smuzhiyun 	&rapl_events_pkg_group,
674*4882a593Smuzhiyun 	&rapl_events_ram_group,
675*4882a593Smuzhiyun 	&rapl_events_gpu_group,
676*4882a593Smuzhiyun 	&rapl_events_psys_group,
677*4882a593Smuzhiyun 	NULL,
678*4882a593Smuzhiyun };
679*4882a593Smuzhiyun 
init_rapl_pmus(void)680*4882a593Smuzhiyun static int __init init_rapl_pmus(void)
681*4882a593Smuzhiyun {
682*4882a593Smuzhiyun 	int maxdie = topology_max_packages() * topology_max_die_per_package();
683*4882a593Smuzhiyun 	size_t size;
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun 	size = sizeof(*rapl_pmus) + maxdie * sizeof(struct rapl_pmu *);
686*4882a593Smuzhiyun 	rapl_pmus = kzalloc(size, GFP_KERNEL);
687*4882a593Smuzhiyun 	if (!rapl_pmus)
688*4882a593Smuzhiyun 		return -ENOMEM;
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 	rapl_pmus->maxdie		= maxdie;
691*4882a593Smuzhiyun 	rapl_pmus->pmu.attr_groups	= rapl_attr_groups;
692*4882a593Smuzhiyun 	rapl_pmus->pmu.attr_update	= rapl_attr_update;
693*4882a593Smuzhiyun 	rapl_pmus->pmu.task_ctx_nr	= perf_invalid_context;
694*4882a593Smuzhiyun 	rapl_pmus->pmu.event_init	= rapl_pmu_event_init;
695*4882a593Smuzhiyun 	rapl_pmus->pmu.add		= rapl_pmu_event_add;
696*4882a593Smuzhiyun 	rapl_pmus->pmu.del		= rapl_pmu_event_del;
697*4882a593Smuzhiyun 	rapl_pmus->pmu.start		= rapl_pmu_event_start;
698*4882a593Smuzhiyun 	rapl_pmus->pmu.stop		= rapl_pmu_event_stop;
699*4882a593Smuzhiyun 	rapl_pmus->pmu.read		= rapl_pmu_event_read;
700*4882a593Smuzhiyun 	rapl_pmus->pmu.module		= THIS_MODULE;
701*4882a593Smuzhiyun 	rapl_pmus->pmu.capabilities	= PERF_PMU_CAP_NO_EXCLUDE;
702*4882a593Smuzhiyun 	return 0;
703*4882a593Smuzhiyun }
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun static struct rapl_model model_snb = {
706*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PP0) |
707*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PKG) |
708*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PP1),
709*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
710*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
711*4882a593Smuzhiyun };
712*4882a593Smuzhiyun 
713*4882a593Smuzhiyun static struct rapl_model model_snbep = {
714*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PP0) |
715*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PKG) |
716*4882a593Smuzhiyun 			  BIT(PERF_RAPL_RAM),
717*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
718*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
719*4882a593Smuzhiyun };
720*4882a593Smuzhiyun 
721*4882a593Smuzhiyun static struct rapl_model model_hsw = {
722*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PP0) |
723*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PKG) |
724*4882a593Smuzhiyun 			  BIT(PERF_RAPL_RAM) |
725*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PP1),
726*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
727*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
728*4882a593Smuzhiyun };
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun static struct rapl_model model_hsx = {
731*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PP0) |
732*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PKG) |
733*4882a593Smuzhiyun 			  BIT(PERF_RAPL_RAM),
734*4882a593Smuzhiyun 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
735*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
736*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
737*4882a593Smuzhiyun };
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun static struct rapl_model model_knl = {
740*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PKG) |
741*4882a593Smuzhiyun 			  BIT(PERF_RAPL_RAM),
742*4882a593Smuzhiyun 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_HSW,
743*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
744*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
745*4882a593Smuzhiyun };
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun static struct rapl_model model_skl = {
748*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PP0) |
749*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PKG) |
750*4882a593Smuzhiyun 			  BIT(PERF_RAPL_RAM) |
751*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PP1) |
752*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PSYS),
753*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
754*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
755*4882a593Smuzhiyun };
756*4882a593Smuzhiyun 
757*4882a593Smuzhiyun static struct rapl_model model_spr = {
758*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PP0) |
759*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PKG) |
760*4882a593Smuzhiyun 			  BIT(PERF_RAPL_RAM) |
761*4882a593Smuzhiyun 			  BIT(PERF_RAPL_PSYS),
762*4882a593Smuzhiyun 	.unit_quirk	= RAPL_UNIT_QUIRK_INTEL_SPR,
763*4882a593Smuzhiyun 	.msr_power_unit = MSR_RAPL_POWER_UNIT,
764*4882a593Smuzhiyun 	.rapl_msrs      = intel_rapl_msrs,
765*4882a593Smuzhiyun };
766*4882a593Smuzhiyun 
767*4882a593Smuzhiyun static struct rapl_model model_amd_fam17h = {
768*4882a593Smuzhiyun 	.events		= BIT(PERF_RAPL_PKG),
769*4882a593Smuzhiyun 	.msr_power_unit = MSR_AMD_RAPL_POWER_UNIT,
770*4882a593Smuzhiyun 	.rapl_msrs      = amd_rapl_msrs,
771*4882a593Smuzhiyun };
772*4882a593Smuzhiyun 
773*4882a593Smuzhiyun static const struct x86_cpu_id rapl_model_match[] __initconst = {
774*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE,		&model_snb),
775*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(SANDYBRIDGE_X,	&model_snbep),
776*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE,		&model_snb),
777*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X,		&model_snbep),
778*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL,		&model_hsw),
779*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X,		&model_hsx),
780*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_L,		&model_hsw),
781*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(HASWELL_G,		&model_hsw),
782*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL,		&model_hsw),
783*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_G,		&model_hsw),
784*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X,		&model_hsx),
785*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D,		&model_hsx),
786*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL,	&model_knl),
787*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM,	&model_knl),
788*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L,		&model_skl),
789*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE,		&model_skl),
790*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X,		&model_hsx),
791*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L,		&model_skl),
792*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE,		&model_skl),
793*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(CANNONLAKE_L,	&model_skl),
794*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT,	&model_hsw),
795*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_D,	&model_hsw),
796*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS,	&model_hsw),
797*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,		&model_skl),
798*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE,		&model_skl),
799*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,		&model_hsx),
800*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,		&model_hsx),
801*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L,		&model_skl),
802*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE,		&model_skl),
803*4882a593Smuzhiyun 	X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X,	&model_spr),
804*4882a593Smuzhiyun 	X86_MATCH_VENDOR_FAM(AMD,	0x17,		&model_amd_fam17h),
805*4882a593Smuzhiyun 	X86_MATCH_VENDOR_FAM(HYGON,	0x18,		&model_amd_fam17h),
806*4882a593Smuzhiyun 	X86_MATCH_VENDOR_FAM(AMD,	0x19,		&model_amd_fam17h),
807*4882a593Smuzhiyun 	{},
808*4882a593Smuzhiyun };
809*4882a593Smuzhiyun MODULE_DEVICE_TABLE(x86cpu, rapl_model_match);
810*4882a593Smuzhiyun 
rapl_pmu_init(void)811*4882a593Smuzhiyun static int __init rapl_pmu_init(void)
812*4882a593Smuzhiyun {
813*4882a593Smuzhiyun 	const struct x86_cpu_id *id;
814*4882a593Smuzhiyun 	struct rapl_model *rm;
815*4882a593Smuzhiyun 	int ret;
816*4882a593Smuzhiyun 
817*4882a593Smuzhiyun 	id = x86_match_cpu(rapl_model_match);
818*4882a593Smuzhiyun 	if (!id)
819*4882a593Smuzhiyun 		return -ENODEV;
820*4882a593Smuzhiyun 
821*4882a593Smuzhiyun 	rm = (struct rapl_model *) id->driver_data;
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun 	rapl_msrs = rm->rapl_msrs;
824*4882a593Smuzhiyun 
825*4882a593Smuzhiyun 	rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX,
826*4882a593Smuzhiyun 					false, (void *) &rm->events);
827*4882a593Smuzhiyun 
828*4882a593Smuzhiyun 	ret = rapl_check_hw_unit(rm);
829*4882a593Smuzhiyun 	if (ret)
830*4882a593Smuzhiyun 		return ret;
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun 	ret = init_rapl_pmus();
833*4882a593Smuzhiyun 	if (ret)
834*4882a593Smuzhiyun 		return ret;
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun 	/*
837*4882a593Smuzhiyun 	 * Install callbacks. Core will call them for each online cpu.
838*4882a593Smuzhiyun 	 */
839*4882a593Smuzhiyun 	ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE,
840*4882a593Smuzhiyun 				"perf/x86/rapl:online",
841*4882a593Smuzhiyun 				rapl_cpu_online, rapl_cpu_offline);
842*4882a593Smuzhiyun 	if (ret)
843*4882a593Smuzhiyun 		goto out;
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1);
846*4882a593Smuzhiyun 	if (ret)
847*4882a593Smuzhiyun 		goto out1;
848*4882a593Smuzhiyun 
849*4882a593Smuzhiyun 	rapl_advertise();
850*4882a593Smuzhiyun 	return 0;
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun out1:
853*4882a593Smuzhiyun 	cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE);
854*4882a593Smuzhiyun out:
855*4882a593Smuzhiyun 	pr_warn("Initialization failed (%d), disabled\n", ret);
856*4882a593Smuzhiyun 	cleanup_rapl_pmus();
857*4882a593Smuzhiyun 	return ret;
858*4882a593Smuzhiyun }
859*4882a593Smuzhiyun module_init(rapl_pmu_init);
860*4882a593Smuzhiyun 
intel_rapl_exit(void)861*4882a593Smuzhiyun static void __exit intel_rapl_exit(void)
862*4882a593Smuzhiyun {
863*4882a593Smuzhiyun 	cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE);
864*4882a593Smuzhiyun 	perf_pmu_unregister(&rapl_pmus->pmu);
865*4882a593Smuzhiyun 	cleanup_rapl_pmus();
866*4882a593Smuzhiyun }
867*4882a593Smuzhiyun module_exit(intel_rapl_exit);
868