xref: /OK3568_Linux_fs/kernel/arch/x86/oprofile/op_model_amd.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun  * @file op_model_amd.c
3*4882a593Smuzhiyun  * athlon / K7 / K8 / Family 10h model-specific MSR operations
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * @remark Copyright 2002-2009 OProfile authors
6*4882a593Smuzhiyun  * @remark Read the file COPYING
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * @author John Levon
9*4882a593Smuzhiyun  * @author Philippe Elie
10*4882a593Smuzhiyun  * @author Graydon Hoare
11*4882a593Smuzhiyun  * @author Robert Richter <robert.richter@amd.com>
12*4882a593Smuzhiyun  * @author Barry Kasindorf <barry.kasindorf@amd.com>
13*4882a593Smuzhiyun  * @author Jason Yeh <jason.yeh@amd.com>
14*4882a593Smuzhiyun  * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
15*4882a593Smuzhiyun  */
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun #include <linux/oprofile.h>
18*4882a593Smuzhiyun #include <linux/device.h>
19*4882a593Smuzhiyun #include <linux/pci.h>
20*4882a593Smuzhiyun #include <linux/percpu.h>
21*4882a593Smuzhiyun 
22*4882a593Smuzhiyun #include <asm/ptrace.h>
23*4882a593Smuzhiyun #include <asm/msr.h>
24*4882a593Smuzhiyun #include <asm/nmi.h>
25*4882a593Smuzhiyun #include <asm/apic.h>
26*4882a593Smuzhiyun #include <asm/processor.h>
27*4882a593Smuzhiyun 
28*4882a593Smuzhiyun #include "op_x86_model.h"
29*4882a593Smuzhiyun #include "op_counter.h"
30*4882a593Smuzhiyun 
31*4882a593Smuzhiyun #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
32*4882a593Smuzhiyun #define NUM_VIRT_COUNTERS	32
33*4882a593Smuzhiyun #else
34*4882a593Smuzhiyun #define NUM_VIRT_COUNTERS	0
35*4882a593Smuzhiyun #endif
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun #define OP_EVENT_MASK			0x0FFF
38*4882a593Smuzhiyun #define OP_CTR_OVERFLOW			(1ULL<<31)
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun #define MSR_AMD_EVENTSEL_RESERVED	((0xFFFFFCF0ULL<<32)|(1ULL<<21))
41*4882a593Smuzhiyun 
42*4882a593Smuzhiyun static int num_counters;
43*4882a593Smuzhiyun static unsigned long reset_value[OP_MAX_COUNTER];
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun #define IBS_FETCH_SIZE			6
46*4882a593Smuzhiyun #define IBS_OP_SIZE			12
47*4882a593Smuzhiyun 
48*4882a593Smuzhiyun static u32 ibs_caps;
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun struct ibs_config {
51*4882a593Smuzhiyun 	unsigned long op_enabled;
52*4882a593Smuzhiyun 	unsigned long fetch_enabled;
53*4882a593Smuzhiyun 	unsigned long max_cnt_fetch;
54*4882a593Smuzhiyun 	unsigned long max_cnt_op;
55*4882a593Smuzhiyun 	unsigned long rand_en;
56*4882a593Smuzhiyun 	unsigned long dispatched_ops;
57*4882a593Smuzhiyun 	unsigned long branch_target;
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun 
60*4882a593Smuzhiyun struct ibs_state {
61*4882a593Smuzhiyun 	u64		ibs_op_ctl;
62*4882a593Smuzhiyun 	int		branch_target;
63*4882a593Smuzhiyun 	unsigned long	sample_size;
64*4882a593Smuzhiyun };
65*4882a593Smuzhiyun 
66*4882a593Smuzhiyun static struct ibs_config ibs_config;
67*4882a593Smuzhiyun static struct ibs_state ibs_state;
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun /*
70*4882a593Smuzhiyun  * IBS randomization macros
71*4882a593Smuzhiyun  */
72*4882a593Smuzhiyun #define IBS_RANDOM_BITS			12
73*4882a593Smuzhiyun #define IBS_RANDOM_MASK			((1ULL << IBS_RANDOM_BITS) - 1)
74*4882a593Smuzhiyun #define IBS_RANDOM_MAXCNT_OFFSET	(1ULL << (IBS_RANDOM_BITS - 5))
75*4882a593Smuzhiyun 
76*4882a593Smuzhiyun /*
77*4882a593Smuzhiyun  * 16-bit Linear Feedback Shift Register (LFSR)
78*4882a593Smuzhiyun  *
79*4882a593Smuzhiyun  *                       16   14   13    11
80*4882a593Smuzhiyun  * Feedback polynomial = X  + X  + X  +  X  + 1
81*4882a593Smuzhiyun  */
lfsr_random(void)82*4882a593Smuzhiyun static unsigned int lfsr_random(void)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun 	static unsigned int lfsr_value = 0xF00D;
85*4882a593Smuzhiyun 	unsigned int bit;
86*4882a593Smuzhiyun 
87*4882a593Smuzhiyun 	/* Compute next bit to shift in */
88*4882a593Smuzhiyun 	bit = ((lfsr_value >> 0) ^
89*4882a593Smuzhiyun 	       (lfsr_value >> 2) ^
90*4882a593Smuzhiyun 	       (lfsr_value >> 3) ^
91*4882a593Smuzhiyun 	       (lfsr_value >> 5)) & 0x0001;
92*4882a593Smuzhiyun 
93*4882a593Smuzhiyun 	/* Advance to next register value */
94*4882a593Smuzhiyun 	lfsr_value = (lfsr_value >> 1) | (bit << 15);
95*4882a593Smuzhiyun 
96*4882a593Smuzhiyun 	return lfsr_value;
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun  * IBS software randomization
101*4882a593Smuzhiyun  *
102*4882a593Smuzhiyun  * The IBS periodic op counter is randomized in software. The lower 12
103*4882a593Smuzhiyun  * bits of the 20 bit counter are randomized. IbsOpCurCnt is
104*4882a593Smuzhiyun  * initialized with a 12 bit random value.
105*4882a593Smuzhiyun  */
op_amd_randomize_ibs_op(u64 val)106*4882a593Smuzhiyun static inline u64 op_amd_randomize_ibs_op(u64 val)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun 	unsigned int random = lfsr_random();
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun 	if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
111*4882a593Smuzhiyun 		/*
112*4882a593Smuzhiyun 		 * Work around if the hw can not write to IbsOpCurCnt
113*4882a593Smuzhiyun 		 *
114*4882a593Smuzhiyun 		 * Randomize the lower 8 bits of the 16 bit
115*4882a593Smuzhiyun 		 * IbsOpMaxCnt [15:0] value in the range of -128 to
116*4882a593Smuzhiyun 		 * +127 by adding/subtracting an offset to the
117*4882a593Smuzhiyun 		 * maximum count (IbsOpMaxCnt).
118*4882a593Smuzhiyun 		 *
119*4882a593Smuzhiyun 		 * To avoid over or underflows and protect upper bits
120*4882a593Smuzhiyun 		 * starting at bit 16, the initial value for
121*4882a593Smuzhiyun 		 * IbsOpMaxCnt must fit in the range from 0x0081 to
122*4882a593Smuzhiyun 		 * 0xff80.
123*4882a593Smuzhiyun 		 */
124*4882a593Smuzhiyun 		val += (s8)(random >> 4);
125*4882a593Smuzhiyun 	else
126*4882a593Smuzhiyun 		val |= (u64)(random & IBS_RANDOM_MASK) << 32;
127*4882a593Smuzhiyun 
128*4882a593Smuzhiyun 	return val;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun static inline void
op_amd_handle_ibs(struct pt_regs * const regs,struct op_msrs const * const msrs)132*4882a593Smuzhiyun op_amd_handle_ibs(struct pt_regs * const regs,
133*4882a593Smuzhiyun 		  struct op_msrs const * const msrs)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	u64 val, ctl;
136*4882a593Smuzhiyun 	struct op_entry entry;
137*4882a593Smuzhiyun 
138*4882a593Smuzhiyun 	if (!ibs_caps)
139*4882a593Smuzhiyun 		return;
140*4882a593Smuzhiyun 
141*4882a593Smuzhiyun 	if (ibs_config.fetch_enabled) {
142*4882a593Smuzhiyun 		rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
143*4882a593Smuzhiyun 		if (ctl & IBS_FETCH_VAL) {
144*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
145*4882a593Smuzhiyun 			oprofile_write_reserve(&entry, regs, val,
146*4882a593Smuzhiyun 					       IBS_FETCH_CODE, IBS_FETCH_SIZE);
147*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
148*4882a593Smuzhiyun 			oprofile_add_data64(&entry, ctl);
149*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
150*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
151*4882a593Smuzhiyun 			oprofile_write_commit(&entry);
152*4882a593Smuzhiyun 
153*4882a593Smuzhiyun 			/* reenable the IRQ */
154*4882a593Smuzhiyun 			ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
155*4882a593Smuzhiyun 			ctl |= IBS_FETCH_ENABLE;
156*4882a593Smuzhiyun 			wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
157*4882a593Smuzhiyun 		}
158*4882a593Smuzhiyun 	}
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	if (ibs_config.op_enabled) {
161*4882a593Smuzhiyun 		rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
162*4882a593Smuzhiyun 		if (ctl & IBS_OP_VAL) {
163*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSOPRIP, val);
164*4882a593Smuzhiyun 			oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
165*4882a593Smuzhiyun 					       ibs_state.sample_size);
166*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
167*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSOPDATA, val);
168*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
169*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSOPDATA2, val);
170*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
171*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSOPDATA3, val);
172*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
173*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSDCLINAD, val);
174*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
175*4882a593Smuzhiyun 			rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
176*4882a593Smuzhiyun 			oprofile_add_data64(&entry, val);
177*4882a593Smuzhiyun 			if (ibs_state.branch_target) {
178*4882a593Smuzhiyun 				rdmsrl(MSR_AMD64_IBSBRTARGET, val);
179*4882a593Smuzhiyun 				oprofile_add_data(&entry, (unsigned long)val);
180*4882a593Smuzhiyun 			}
181*4882a593Smuzhiyun 			oprofile_write_commit(&entry);
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun 			/* reenable the IRQ */
184*4882a593Smuzhiyun 			ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
185*4882a593Smuzhiyun 			wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
186*4882a593Smuzhiyun 		}
187*4882a593Smuzhiyun 	}
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun 
op_amd_start_ibs(void)190*4882a593Smuzhiyun static inline void op_amd_start_ibs(void)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun 	u64 val;
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun 	if (!ibs_caps)
195*4882a593Smuzhiyun 		return;
196*4882a593Smuzhiyun 
197*4882a593Smuzhiyun 	memset(&ibs_state, 0, sizeof(ibs_state));
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	/*
200*4882a593Smuzhiyun 	 * Note: Since the max count settings may out of range we
201*4882a593Smuzhiyun 	 * write back the actual used values so that userland can read
202*4882a593Smuzhiyun 	 * it.
203*4882a593Smuzhiyun 	 */
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	if (ibs_config.fetch_enabled) {
206*4882a593Smuzhiyun 		val = ibs_config.max_cnt_fetch >> 4;
207*4882a593Smuzhiyun 		val = min(val, IBS_FETCH_MAX_CNT);
208*4882a593Smuzhiyun 		ibs_config.max_cnt_fetch = val << 4;
209*4882a593Smuzhiyun 		val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
210*4882a593Smuzhiyun 		val |= IBS_FETCH_ENABLE;
211*4882a593Smuzhiyun 		wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
212*4882a593Smuzhiyun 	}
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	if (ibs_config.op_enabled) {
215*4882a593Smuzhiyun 		val = ibs_config.max_cnt_op >> 4;
216*4882a593Smuzhiyun 		if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
217*4882a593Smuzhiyun 			/*
218*4882a593Smuzhiyun 			 * IbsOpCurCnt not supported.  See
219*4882a593Smuzhiyun 			 * op_amd_randomize_ibs_op() for details.
220*4882a593Smuzhiyun 			 */
221*4882a593Smuzhiyun 			val = clamp(val, 0x0081ULL, 0xFF80ULL);
222*4882a593Smuzhiyun 			ibs_config.max_cnt_op = val << 4;
223*4882a593Smuzhiyun 		} else {
224*4882a593Smuzhiyun 			/*
225*4882a593Smuzhiyun 			 * The start value is randomized with a
226*4882a593Smuzhiyun 			 * positive offset, we need to compensate it
227*4882a593Smuzhiyun 			 * with the half of the randomized range. Also
228*4882a593Smuzhiyun 			 * avoid underflows.
229*4882a593Smuzhiyun 			 */
230*4882a593Smuzhiyun 			val += IBS_RANDOM_MAXCNT_OFFSET;
231*4882a593Smuzhiyun 			if (ibs_caps & IBS_CAPS_OPCNTEXT)
232*4882a593Smuzhiyun 				val = min(val, IBS_OP_MAX_CNT_EXT);
233*4882a593Smuzhiyun 			else
234*4882a593Smuzhiyun 				val = min(val, IBS_OP_MAX_CNT);
235*4882a593Smuzhiyun 			ibs_config.max_cnt_op =
236*4882a593Smuzhiyun 				(val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
237*4882a593Smuzhiyun 		}
238*4882a593Smuzhiyun 		val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
239*4882a593Smuzhiyun 		val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
240*4882a593Smuzhiyun 		val |= IBS_OP_ENABLE;
241*4882a593Smuzhiyun 		ibs_state.ibs_op_ctl = val;
242*4882a593Smuzhiyun 		ibs_state.sample_size = IBS_OP_SIZE;
243*4882a593Smuzhiyun 		if (ibs_config.branch_target) {
244*4882a593Smuzhiyun 			ibs_state.branch_target = 1;
245*4882a593Smuzhiyun 			ibs_state.sample_size++;
246*4882a593Smuzhiyun 		}
247*4882a593Smuzhiyun 		val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
248*4882a593Smuzhiyun 		wrmsrl(MSR_AMD64_IBSOPCTL, val);
249*4882a593Smuzhiyun 	}
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun 
op_amd_stop_ibs(void)252*4882a593Smuzhiyun static void op_amd_stop_ibs(void)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun 	if (!ibs_caps)
255*4882a593Smuzhiyun 		return;
256*4882a593Smuzhiyun 
257*4882a593Smuzhiyun 	if (ibs_config.fetch_enabled)
258*4882a593Smuzhiyun 		/* clear max count and enable */
259*4882a593Smuzhiyun 		wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
260*4882a593Smuzhiyun 
261*4882a593Smuzhiyun 	if (ibs_config.op_enabled)
262*4882a593Smuzhiyun 		/* clear max count and enable */
263*4882a593Smuzhiyun 		wrmsrl(MSR_AMD64_IBSOPCTL, 0);
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun 
266*4882a593Smuzhiyun #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
267*4882a593Smuzhiyun 
op_mux_switch_ctrl(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)268*4882a593Smuzhiyun static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
269*4882a593Smuzhiyun 			       struct op_msrs const * const msrs)
270*4882a593Smuzhiyun {
271*4882a593Smuzhiyun 	u64 val;
272*4882a593Smuzhiyun 	int i;
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 	/* enable active counters */
275*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
276*4882a593Smuzhiyun 		int virt = op_x86_phys_to_virt(i);
277*4882a593Smuzhiyun 		if (!reset_value[virt])
278*4882a593Smuzhiyun 			continue;
279*4882a593Smuzhiyun 		rdmsrl(msrs->controls[i].addr, val);
280*4882a593Smuzhiyun 		val &= model->reserved;
281*4882a593Smuzhiyun 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
282*4882a593Smuzhiyun 		wrmsrl(msrs->controls[i].addr, val);
283*4882a593Smuzhiyun 	}
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun 
286*4882a593Smuzhiyun #endif
287*4882a593Smuzhiyun 
288*4882a593Smuzhiyun /* functions for op_amd_spec */
289*4882a593Smuzhiyun 
op_amd_shutdown(struct op_msrs const * const msrs)290*4882a593Smuzhiyun static void op_amd_shutdown(struct op_msrs const * const msrs)
291*4882a593Smuzhiyun {
292*4882a593Smuzhiyun 	int i;
293*4882a593Smuzhiyun 
294*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
295*4882a593Smuzhiyun 		if (!msrs->counters[i].addr)
296*4882a593Smuzhiyun 			continue;
297*4882a593Smuzhiyun 		release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
298*4882a593Smuzhiyun 		release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
299*4882a593Smuzhiyun 	}
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun 
op_amd_fill_in_addresses(struct op_msrs * const msrs)302*4882a593Smuzhiyun static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
303*4882a593Smuzhiyun {
304*4882a593Smuzhiyun 	int i;
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 	for (i = 0; i < num_counters; i++) {
307*4882a593Smuzhiyun 		if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
308*4882a593Smuzhiyun 			goto fail;
309*4882a593Smuzhiyun 		if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
310*4882a593Smuzhiyun 			release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
311*4882a593Smuzhiyun 			goto fail;
312*4882a593Smuzhiyun 		}
313*4882a593Smuzhiyun 		/* both registers must be reserved */
314*4882a593Smuzhiyun 		if (num_counters == AMD64_NUM_COUNTERS_CORE) {
315*4882a593Smuzhiyun 			msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
316*4882a593Smuzhiyun 			msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
317*4882a593Smuzhiyun 		} else {
318*4882a593Smuzhiyun 			msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
319*4882a593Smuzhiyun 			msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
320*4882a593Smuzhiyun 		}
321*4882a593Smuzhiyun 		continue;
322*4882a593Smuzhiyun 	fail:
323*4882a593Smuzhiyun 		if (!counter_config[i].enabled)
324*4882a593Smuzhiyun 			continue;
325*4882a593Smuzhiyun 		op_x86_warn_reserved(i);
326*4882a593Smuzhiyun 		op_amd_shutdown(msrs);
327*4882a593Smuzhiyun 		return -EBUSY;
328*4882a593Smuzhiyun 	}
329*4882a593Smuzhiyun 
330*4882a593Smuzhiyun 	return 0;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun 
op_amd_setup_ctrs(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)333*4882a593Smuzhiyun static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
334*4882a593Smuzhiyun 			      struct op_msrs const * const msrs)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun 	u64 val;
337*4882a593Smuzhiyun 	int i;
338*4882a593Smuzhiyun 
339*4882a593Smuzhiyun 	/* setup reset_value */
340*4882a593Smuzhiyun 	for (i = 0; i < OP_MAX_COUNTER; ++i) {
341*4882a593Smuzhiyun 		if (counter_config[i].enabled
342*4882a593Smuzhiyun 		    && msrs->counters[op_x86_virt_to_phys(i)].addr)
343*4882a593Smuzhiyun 			reset_value[i] = counter_config[i].count;
344*4882a593Smuzhiyun 		else
345*4882a593Smuzhiyun 			reset_value[i] = 0;
346*4882a593Smuzhiyun 	}
347*4882a593Smuzhiyun 
348*4882a593Smuzhiyun 	/* clear all counters */
349*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
350*4882a593Smuzhiyun 		if (!msrs->controls[i].addr)
351*4882a593Smuzhiyun 			continue;
352*4882a593Smuzhiyun 		rdmsrl(msrs->controls[i].addr, val);
353*4882a593Smuzhiyun 		if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
354*4882a593Smuzhiyun 			op_x86_warn_in_use(i);
355*4882a593Smuzhiyun 		val &= model->reserved;
356*4882a593Smuzhiyun 		wrmsrl(msrs->controls[i].addr, val);
357*4882a593Smuzhiyun 		/*
358*4882a593Smuzhiyun 		 * avoid a false detection of ctr overflows in NMI
359*4882a593Smuzhiyun 		 * handler
360*4882a593Smuzhiyun 		 */
361*4882a593Smuzhiyun 		wrmsrl(msrs->counters[i].addr, -1LL);
362*4882a593Smuzhiyun 	}
363*4882a593Smuzhiyun 
364*4882a593Smuzhiyun 	/* enable active counters */
365*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
366*4882a593Smuzhiyun 		int virt = op_x86_phys_to_virt(i);
367*4882a593Smuzhiyun 		if (!reset_value[virt])
368*4882a593Smuzhiyun 			continue;
369*4882a593Smuzhiyun 
370*4882a593Smuzhiyun 		/* setup counter registers */
371*4882a593Smuzhiyun 		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
372*4882a593Smuzhiyun 
373*4882a593Smuzhiyun 		/* setup control registers */
374*4882a593Smuzhiyun 		rdmsrl(msrs->controls[i].addr, val);
375*4882a593Smuzhiyun 		val &= model->reserved;
376*4882a593Smuzhiyun 		val |= op_x86_get_ctrl(model, &counter_config[virt]);
377*4882a593Smuzhiyun 		wrmsrl(msrs->controls[i].addr, val);
378*4882a593Smuzhiyun 	}
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun 
op_amd_check_ctrs(struct pt_regs * const regs,struct op_msrs const * const msrs)381*4882a593Smuzhiyun static int op_amd_check_ctrs(struct pt_regs * const regs,
382*4882a593Smuzhiyun 			     struct op_msrs const * const msrs)
383*4882a593Smuzhiyun {
384*4882a593Smuzhiyun 	u64 val;
385*4882a593Smuzhiyun 	int i;
386*4882a593Smuzhiyun 
387*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
388*4882a593Smuzhiyun 		int virt = op_x86_phys_to_virt(i);
389*4882a593Smuzhiyun 		if (!reset_value[virt])
390*4882a593Smuzhiyun 			continue;
391*4882a593Smuzhiyun 		rdmsrl(msrs->counters[i].addr, val);
392*4882a593Smuzhiyun 		/* bit is clear if overflowed: */
393*4882a593Smuzhiyun 		if (val & OP_CTR_OVERFLOW)
394*4882a593Smuzhiyun 			continue;
395*4882a593Smuzhiyun 		oprofile_add_sample(regs, virt);
396*4882a593Smuzhiyun 		wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
397*4882a593Smuzhiyun 	}
398*4882a593Smuzhiyun 
399*4882a593Smuzhiyun 	op_amd_handle_ibs(regs, msrs);
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun 	/* See op_model_ppro.c */
402*4882a593Smuzhiyun 	return 1;
403*4882a593Smuzhiyun }
404*4882a593Smuzhiyun 
op_amd_start(struct op_msrs const * const msrs)405*4882a593Smuzhiyun static void op_amd_start(struct op_msrs const * const msrs)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun 	u64 val;
408*4882a593Smuzhiyun 	int i;
409*4882a593Smuzhiyun 
410*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
411*4882a593Smuzhiyun 		if (!reset_value[op_x86_phys_to_virt(i)])
412*4882a593Smuzhiyun 			continue;
413*4882a593Smuzhiyun 		rdmsrl(msrs->controls[i].addr, val);
414*4882a593Smuzhiyun 		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
415*4882a593Smuzhiyun 		wrmsrl(msrs->controls[i].addr, val);
416*4882a593Smuzhiyun 	}
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 	op_amd_start_ibs();
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun 
op_amd_stop(struct op_msrs const * const msrs)421*4882a593Smuzhiyun static void op_amd_stop(struct op_msrs const * const msrs)
422*4882a593Smuzhiyun {
423*4882a593Smuzhiyun 	u64 val;
424*4882a593Smuzhiyun 	int i;
425*4882a593Smuzhiyun 
426*4882a593Smuzhiyun 	/*
427*4882a593Smuzhiyun 	 * Subtle: stop on all counters to avoid race with setting our
428*4882a593Smuzhiyun 	 * pm callback
429*4882a593Smuzhiyun 	 */
430*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
431*4882a593Smuzhiyun 		if (!reset_value[op_x86_phys_to_virt(i)])
432*4882a593Smuzhiyun 			continue;
433*4882a593Smuzhiyun 		rdmsrl(msrs->controls[i].addr, val);
434*4882a593Smuzhiyun 		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
435*4882a593Smuzhiyun 		wrmsrl(msrs->controls[i].addr, val);
436*4882a593Smuzhiyun 	}
437*4882a593Smuzhiyun 
438*4882a593Smuzhiyun 	op_amd_stop_ibs();
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun /*
442*4882a593Smuzhiyun  * check and reserve APIC extended interrupt LVT offset for IBS if
443*4882a593Smuzhiyun  * available
444*4882a593Smuzhiyun  */
445*4882a593Smuzhiyun 
init_ibs(void)446*4882a593Smuzhiyun static void init_ibs(void)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun 	ibs_caps = get_ibs_caps();
449*4882a593Smuzhiyun 
450*4882a593Smuzhiyun 	if (!ibs_caps)
451*4882a593Smuzhiyun 		return;
452*4882a593Smuzhiyun 
453*4882a593Smuzhiyun 	printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun static int (*create_arch_files)(struct dentry *root);
457*4882a593Smuzhiyun 
setup_ibs_files(struct dentry * root)458*4882a593Smuzhiyun static int setup_ibs_files(struct dentry *root)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun 	struct dentry *dir;
461*4882a593Smuzhiyun 	int ret = 0;
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	/* architecture specific files */
464*4882a593Smuzhiyun 	if (create_arch_files)
465*4882a593Smuzhiyun 		ret = create_arch_files(root);
466*4882a593Smuzhiyun 
467*4882a593Smuzhiyun 	if (ret)
468*4882a593Smuzhiyun 		return ret;
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	if (!ibs_caps)
471*4882a593Smuzhiyun 		return ret;
472*4882a593Smuzhiyun 
473*4882a593Smuzhiyun 	/* model specific files */
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	/* setup some reasonable defaults */
476*4882a593Smuzhiyun 	memset(&ibs_config, 0, sizeof(ibs_config));
477*4882a593Smuzhiyun 	ibs_config.max_cnt_fetch = 250000;
478*4882a593Smuzhiyun 	ibs_config.max_cnt_op = 250000;
479*4882a593Smuzhiyun 
480*4882a593Smuzhiyun 	if (ibs_caps & IBS_CAPS_FETCHSAM) {
481*4882a593Smuzhiyun 		dir = oprofilefs_mkdir(root, "ibs_fetch");
482*4882a593Smuzhiyun 		oprofilefs_create_ulong(dir, "enable",
483*4882a593Smuzhiyun 					&ibs_config.fetch_enabled);
484*4882a593Smuzhiyun 		oprofilefs_create_ulong(dir, "max_count",
485*4882a593Smuzhiyun 					&ibs_config.max_cnt_fetch);
486*4882a593Smuzhiyun 		oprofilefs_create_ulong(dir, "rand_enable",
487*4882a593Smuzhiyun 					&ibs_config.rand_en);
488*4882a593Smuzhiyun 	}
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	if (ibs_caps & IBS_CAPS_OPSAM) {
491*4882a593Smuzhiyun 		dir = oprofilefs_mkdir(root, "ibs_op");
492*4882a593Smuzhiyun 		oprofilefs_create_ulong(dir, "enable",
493*4882a593Smuzhiyun 					&ibs_config.op_enabled);
494*4882a593Smuzhiyun 		oprofilefs_create_ulong(dir, "max_count",
495*4882a593Smuzhiyun 					&ibs_config.max_cnt_op);
496*4882a593Smuzhiyun 		if (ibs_caps & IBS_CAPS_OPCNT)
497*4882a593Smuzhiyun 			oprofilefs_create_ulong(dir, "dispatched_ops",
498*4882a593Smuzhiyun 						&ibs_config.dispatched_ops);
499*4882a593Smuzhiyun 		if (ibs_caps & IBS_CAPS_BRNTRGT)
500*4882a593Smuzhiyun 			oprofilefs_create_ulong(dir, "branch_target",
501*4882a593Smuzhiyun 						&ibs_config.branch_target);
502*4882a593Smuzhiyun 	}
503*4882a593Smuzhiyun 
504*4882a593Smuzhiyun 	return 0;
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun 
507*4882a593Smuzhiyun struct op_x86_model_spec op_amd_spec;
508*4882a593Smuzhiyun 
op_amd_init(struct oprofile_operations * ops)509*4882a593Smuzhiyun static int op_amd_init(struct oprofile_operations *ops)
510*4882a593Smuzhiyun {
511*4882a593Smuzhiyun 	init_ibs();
512*4882a593Smuzhiyun 	create_arch_files = ops->create_files;
513*4882a593Smuzhiyun 	ops->create_files = setup_ibs_files;
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 	if (boot_cpu_data.x86 == 0x15) {
516*4882a593Smuzhiyun 		num_counters = AMD64_NUM_COUNTERS_CORE;
517*4882a593Smuzhiyun 	} else {
518*4882a593Smuzhiyun 		num_counters = AMD64_NUM_COUNTERS;
519*4882a593Smuzhiyun 	}
520*4882a593Smuzhiyun 
521*4882a593Smuzhiyun 	op_amd_spec.num_counters = num_counters;
522*4882a593Smuzhiyun 	op_amd_spec.num_controls = num_counters;
523*4882a593Smuzhiyun 	op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
524*4882a593Smuzhiyun 
525*4882a593Smuzhiyun 	return 0;
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun struct op_x86_model_spec op_amd_spec = {
529*4882a593Smuzhiyun 	/* num_counters/num_controls filled in at runtime */
530*4882a593Smuzhiyun 	.reserved		= MSR_AMD_EVENTSEL_RESERVED,
531*4882a593Smuzhiyun 	.event_mask		= OP_EVENT_MASK,
532*4882a593Smuzhiyun 	.init			= op_amd_init,
533*4882a593Smuzhiyun 	.fill_in_addresses	= &op_amd_fill_in_addresses,
534*4882a593Smuzhiyun 	.setup_ctrs		= &op_amd_setup_ctrs,
535*4882a593Smuzhiyun 	.check_ctrs		= &op_amd_check_ctrs,
536*4882a593Smuzhiyun 	.start			= &op_amd_start,
537*4882a593Smuzhiyun 	.stop			= &op_amd_stop,
538*4882a593Smuzhiyun 	.shutdown		= &op_amd_shutdown,
539*4882a593Smuzhiyun #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
540*4882a593Smuzhiyun 	.switch_ctrl		= &op_mux_switch_ctrl,
541*4882a593Smuzhiyun #endif
542*4882a593Smuzhiyun };
543