1*4882a593Smuzhiyun /*
2*4882a593Smuzhiyun * @file op_model_amd.c
3*4882a593Smuzhiyun * athlon / K7 / K8 / Family 10h model-specific MSR operations
4*4882a593Smuzhiyun *
5*4882a593Smuzhiyun * @remark Copyright 2002-2009 OProfile authors
6*4882a593Smuzhiyun * @remark Read the file COPYING
7*4882a593Smuzhiyun *
8*4882a593Smuzhiyun * @author John Levon
9*4882a593Smuzhiyun * @author Philippe Elie
10*4882a593Smuzhiyun * @author Graydon Hoare
11*4882a593Smuzhiyun * @author Robert Richter <robert.richter@amd.com>
12*4882a593Smuzhiyun * @author Barry Kasindorf <barry.kasindorf@amd.com>
13*4882a593Smuzhiyun * @author Jason Yeh <jason.yeh@amd.com>
14*4882a593Smuzhiyun * @author Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
15*4882a593Smuzhiyun */
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun #include <linux/oprofile.h>
18*4882a593Smuzhiyun #include <linux/device.h>
19*4882a593Smuzhiyun #include <linux/pci.h>
20*4882a593Smuzhiyun #include <linux/percpu.h>
21*4882a593Smuzhiyun
22*4882a593Smuzhiyun #include <asm/ptrace.h>
23*4882a593Smuzhiyun #include <asm/msr.h>
24*4882a593Smuzhiyun #include <asm/nmi.h>
25*4882a593Smuzhiyun #include <asm/apic.h>
26*4882a593Smuzhiyun #include <asm/processor.h>
27*4882a593Smuzhiyun
28*4882a593Smuzhiyun #include "op_x86_model.h"
29*4882a593Smuzhiyun #include "op_counter.h"
30*4882a593Smuzhiyun
31*4882a593Smuzhiyun #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
32*4882a593Smuzhiyun #define NUM_VIRT_COUNTERS 32
33*4882a593Smuzhiyun #else
34*4882a593Smuzhiyun #define NUM_VIRT_COUNTERS 0
35*4882a593Smuzhiyun #endif
36*4882a593Smuzhiyun
37*4882a593Smuzhiyun #define OP_EVENT_MASK 0x0FFF
38*4882a593Smuzhiyun #define OP_CTR_OVERFLOW (1ULL<<31)
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun #define MSR_AMD_EVENTSEL_RESERVED ((0xFFFFFCF0ULL<<32)|(1ULL<<21))
41*4882a593Smuzhiyun
42*4882a593Smuzhiyun static int num_counters;
43*4882a593Smuzhiyun static unsigned long reset_value[OP_MAX_COUNTER];
44*4882a593Smuzhiyun
45*4882a593Smuzhiyun #define IBS_FETCH_SIZE 6
46*4882a593Smuzhiyun #define IBS_OP_SIZE 12
47*4882a593Smuzhiyun
48*4882a593Smuzhiyun static u32 ibs_caps;
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun struct ibs_config {
51*4882a593Smuzhiyun unsigned long op_enabled;
52*4882a593Smuzhiyun unsigned long fetch_enabled;
53*4882a593Smuzhiyun unsigned long max_cnt_fetch;
54*4882a593Smuzhiyun unsigned long max_cnt_op;
55*4882a593Smuzhiyun unsigned long rand_en;
56*4882a593Smuzhiyun unsigned long dispatched_ops;
57*4882a593Smuzhiyun unsigned long branch_target;
58*4882a593Smuzhiyun };
59*4882a593Smuzhiyun
60*4882a593Smuzhiyun struct ibs_state {
61*4882a593Smuzhiyun u64 ibs_op_ctl;
62*4882a593Smuzhiyun int branch_target;
63*4882a593Smuzhiyun unsigned long sample_size;
64*4882a593Smuzhiyun };
65*4882a593Smuzhiyun
66*4882a593Smuzhiyun static struct ibs_config ibs_config;
67*4882a593Smuzhiyun static struct ibs_state ibs_state;
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun /*
70*4882a593Smuzhiyun * IBS randomization macros
71*4882a593Smuzhiyun */
72*4882a593Smuzhiyun #define IBS_RANDOM_BITS 12
73*4882a593Smuzhiyun #define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1)
74*4882a593Smuzhiyun #define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5))
75*4882a593Smuzhiyun
76*4882a593Smuzhiyun /*
77*4882a593Smuzhiyun * 16-bit Linear Feedback Shift Register (LFSR)
78*4882a593Smuzhiyun *
79*4882a593Smuzhiyun * 16 14 13 11
80*4882a593Smuzhiyun * Feedback polynomial = X + X + X + X + 1
81*4882a593Smuzhiyun */
lfsr_random(void)82*4882a593Smuzhiyun static unsigned int lfsr_random(void)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun static unsigned int lfsr_value = 0xF00D;
85*4882a593Smuzhiyun unsigned int bit;
86*4882a593Smuzhiyun
87*4882a593Smuzhiyun /* Compute next bit to shift in */
88*4882a593Smuzhiyun bit = ((lfsr_value >> 0) ^
89*4882a593Smuzhiyun (lfsr_value >> 2) ^
90*4882a593Smuzhiyun (lfsr_value >> 3) ^
91*4882a593Smuzhiyun (lfsr_value >> 5)) & 0x0001;
92*4882a593Smuzhiyun
93*4882a593Smuzhiyun /* Advance to next register value */
94*4882a593Smuzhiyun lfsr_value = (lfsr_value >> 1) | (bit << 15);
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun return lfsr_value;
97*4882a593Smuzhiyun }
98*4882a593Smuzhiyun
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun * IBS software randomization
101*4882a593Smuzhiyun *
102*4882a593Smuzhiyun * The IBS periodic op counter is randomized in software. The lower 12
103*4882a593Smuzhiyun * bits of the 20 bit counter are randomized. IbsOpCurCnt is
104*4882a593Smuzhiyun * initialized with a 12 bit random value.
105*4882a593Smuzhiyun */
op_amd_randomize_ibs_op(u64 val)106*4882a593Smuzhiyun static inline u64 op_amd_randomize_ibs_op(u64 val)
107*4882a593Smuzhiyun {
108*4882a593Smuzhiyun unsigned int random = lfsr_random();
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun if (!(ibs_caps & IBS_CAPS_RDWROPCNT))
111*4882a593Smuzhiyun /*
112*4882a593Smuzhiyun * Work around if the hw can not write to IbsOpCurCnt
113*4882a593Smuzhiyun *
114*4882a593Smuzhiyun * Randomize the lower 8 bits of the 16 bit
115*4882a593Smuzhiyun * IbsOpMaxCnt [15:0] value in the range of -128 to
116*4882a593Smuzhiyun * +127 by adding/subtracting an offset to the
117*4882a593Smuzhiyun * maximum count (IbsOpMaxCnt).
118*4882a593Smuzhiyun *
119*4882a593Smuzhiyun * To avoid over or underflows and protect upper bits
120*4882a593Smuzhiyun * starting at bit 16, the initial value for
121*4882a593Smuzhiyun * IbsOpMaxCnt must fit in the range from 0x0081 to
122*4882a593Smuzhiyun * 0xff80.
123*4882a593Smuzhiyun */
124*4882a593Smuzhiyun val += (s8)(random >> 4);
125*4882a593Smuzhiyun else
126*4882a593Smuzhiyun val |= (u64)(random & IBS_RANDOM_MASK) << 32;
127*4882a593Smuzhiyun
128*4882a593Smuzhiyun return val;
129*4882a593Smuzhiyun }
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun static inline void
op_amd_handle_ibs(struct pt_regs * const regs,struct op_msrs const * const msrs)132*4882a593Smuzhiyun op_amd_handle_ibs(struct pt_regs * const regs,
133*4882a593Smuzhiyun struct op_msrs const * const msrs)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun u64 val, ctl;
136*4882a593Smuzhiyun struct op_entry entry;
137*4882a593Smuzhiyun
138*4882a593Smuzhiyun if (!ibs_caps)
139*4882a593Smuzhiyun return;
140*4882a593Smuzhiyun
141*4882a593Smuzhiyun if (ibs_config.fetch_enabled) {
142*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
143*4882a593Smuzhiyun if (ctl & IBS_FETCH_VAL) {
144*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSFETCHLINAD, val);
145*4882a593Smuzhiyun oprofile_write_reserve(&entry, regs, val,
146*4882a593Smuzhiyun IBS_FETCH_CODE, IBS_FETCH_SIZE);
147*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
148*4882a593Smuzhiyun oprofile_add_data64(&entry, ctl);
149*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSFETCHPHYSAD, val);
150*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
151*4882a593Smuzhiyun oprofile_write_commit(&entry);
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun /* reenable the IRQ */
154*4882a593Smuzhiyun ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT);
155*4882a593Smuzhiyun ctl |= IBS_FETCH_ENABLE;
156*4882a593Smuzhiyun wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl);
157*4882a593Smuzhiyun }
158*4882a593Smuzhiyun }
159*4882a593Smuzhiyun
160*4882a593Smuzhiyun if (ibs_config.op_enabled) {
161*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSOPCTL, ctl);
162*4882a593Smuzhiyun if (ctl & IBS_OP_VAL) {
163*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSOPRIP, val);
164*4882a593Smuzhiyun oprofile_write_reserve(&entry, regs, val, IBS_OP_CODE,
165*4882a593Smuzhiyun ibs_state.sample_size);
166*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
167*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSOPDATA, val);
168*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
169*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSOPDATA2, val);
170*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
171*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSOPDATA3, val);
172*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
173*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSDCLINAD, val);
174*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
175*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSDCPHYSAD, val);
176*4882a593Smuzhiyun oprofile_add_data64(&entry, val);
177*4882a593Smuzhiyun if (ibs_state.branch_target) {
178*4882a593Smuzhiyun rdmsrl(MSR_AMD64_IBSBRTARGET, val);
179*4882a593Smuzhiyun oprofile_add_data(&entry, (unsigned long)val);
180*4882a593Smuzhiyun }
181*4882a593Smuzhiyun oprofile_write_commit(&entry);
182*4882a593Smuzhiyun
183*4882a593Smuzhiyun /* reenable the IRQ */
184*4882a593Smuzhiyun ctl = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
185*4882a593Smuzhiyun wrmsrl(MSR_AMD64_IBSOPCTL, ctl);
186*4882a593Smuzhiyun }
187*4882a593Smuzhiyun }
188*4882a593Smuzhiyun }
189*4882a593Smuzhiyun
op_amd_start_ibs(void)190*4882a593Smuzhiyun static inline void op_amd_start_ibs(void)
191*4882a593Smuzhiyun {
192*4882a593Smuzhiyun u64 val;
193*4882a593Smuzhiyun
194*4882a593Smuzhiyun if (!ibs_caps)
195*4882a593Smuzhiyun return;
196*4882a593Smuzhiyun
197*4882a593Smuzhiyun memset(&ibs_state, 0, sizeof(ibs_state));
198*4882a593Smuzhiyun
199*4882a593Smuzhiyun /*
200*4882a593Smuzhiyun * Note: Since the max count settings may out of range we
201*4882a593Smuzhiyun * write back the actual used values so that userland can read
202*4882a593Smuzhiyun * it.
203*4882a593Smuzhiyun */
204*4882a593Smuzhiyun
205*4882a593Smuzhiyun if (ibs_config.fetch_enabled) {
206*4882a593Smuzhiyun val = ibs_config.max_cnt_fetch >> 4;
207*4882a593Smuzhiyun val = min(val, IBS_FETCH_MAX_CNT);
208*4882a593Smuzhiyun ibs_config.max_cnt_fetch = val << 4;
209*4882a593Smuzhiyun val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0;
210*4882a593Smuzhiyun val |= IBS_FETCH_ENABLE;
211*4882a593Smuzhiyun wrmsrl(MSR_AMD64_IBSFETCHCTL, val);
212*4882a593Smuzhiyun }
213*4882a593Smuzhiyun
214*4882a593Smuzhiyun if (ibs_config.op_enabled) {
215*4882a593Smuzhiyun val = ibs_config.max_cnt_op >> 4;
216*4882a593Smuzhiyun if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) {
217*4882a593Smuzhiyun /*
218*4882a593Smuzhiyun * IbsOpCurCnt not supported. See
219*4882a593Smuzhiyun * op_amd_randomize_ibs_op() for details.
220*4882a593Smuzhiyun */
221*4882a593Smuzhiyun val = clamp(val, 0x0081ULL, 0xFF80ULL);
222*4882a593Smuzhiyun ibs_config.max_cnt_op = val << 4;
223*4882a593Smuzhiyun } else {
224*4882a593Smuzhiyun /*
225*4882a593Smuzhiyun * The start value is randomized with a
226*4882a593Smuzhiyun * positive offset, we need to compensate it
227*4882a593Smuzhiyun * with the half of the randomized range. Also
228*4882a593Smuzhiyun * avoid underflows.
229*4882a593Smuzhiyun */
230*4882a593Smuzhiyun val += IBS_RANDOM_MAXCNT_OFFSET;
231*4882a593Smuzhiyun if (ibs_caps & IBS_CAPS_OPCNTEXT)
232*4882a593Smuzhiyun val = min(val, IBS_OP_MAX_CNT_EXT);
233*4882a593Smuzhiyun else
234*4882a593Smuzhiyun val = min(val, IBS_OP_MAX_CNT);
235*4882a593Smuzhiyun ibs_config.max_cnt_op =
236*4882a593Smuzhiyun (val - IBS_RANDOM_MAXCNT_OFFSET) << 4;
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun val = ((val & ~IBS_OP_MAX_CNT) << 4) | (val & IBS_OP_MAX_CNT);
239*4882a593Smuzhiyun val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0;
240*4882a593Smuzhiyun val |= IBS_OP_ENABLE;
241*4882a593Smuzhiyun ibs_state.ibs_op_ctl = val;
242*4882a593Smuzhiyun ibs_state.sample_size = IBS_OP_SIZE;
243*4882a593Smuzhiyun if (ibs_config.branch_target) {
244*4882a593Smuzhiyun ibs_state.branch_target = 1;
245*4882a593Smuzhiyun ibs_state.sample_size++;
246*4882a593Smuzhiyun }
247*4882a593Smuzhiyun val = op_amd_randomize_ibs_op(ibs_state.ibs_op_ctl);
248*4882a593Smuzhiyun wrmsrl(MSR_AMD64_IBSOPCTL, val);
249*4882a593Smuzhiyun }
250*4882a593Smuzhiyun }
251*4882a593Smuzhiyun
op_amd_stop_ibs(void)252*4882a593Smuzhiyun static void op_amd_stop_ibs(void)
253*4882a593Smuzhiyun {
254*4882a593Smuzhiyun if (!ibs_caps)
255*4882a593Smuzhiyun return;
256*4882a593Smuzhiyun
257*4882a593Smuzhiyun if (ibs_config.fetch_enabled)
258*4882a593Smuzhiyun /* clear max count and enable */
259*4882a593Smuzhiyun wrmsrl(MSR_AMD64_IBSFETCHCTL, 0);
260*4882a593Smuzhiyun
261*4882a593Smuzhiyun if (ibs_config.op_enabled)
262*4882a593Smuzhiyun /* clear max count and enable */
263*4882a593Smuzhiyun wrmsrl(MSR_AMD64_IBSOPCTL, 0);
264*4882a593Smuzhiyun }
265*4882a593Smuzhiyun
266*4882a593Smuzhiyun #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
267*4882a593Smuzhiyun
op_mux_switch_ctrl(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)268*4882a593Smuzhiyun static void op_mux_switch_ctrl(struct op_x86_model_spec const *model,
269*4882a593Smuzhiyun struct op_msrs const * const msrs)
270*4882a593Smuzhiyun {
271*4882a593Smuzhiyun u64 val;
272*4882a593Smuzhiyun int i;
273*4882a593Smuzhiyun
274*4882a593Smuzhiyun /* enable active counters */
275*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
276*4882a593Smuzhiyun int virt = op_x86_phys_to_virt(i);
277*4882a593Smuzhiyun if (!reset_value[virt])
278*4882a593Smuzhiyun continue;
279*4882a593Smuzhiyun rdmsrl(msrs->controls[i].addr, val);
280*4882a593Smuzhiyun val &= model->reserved;
281*4882a593Smuzhiyun val |= op_x86_get_ctrl(model, &counter_config[virt]);
282*4882a593Smuzhiyun wrmsrl(msrs->controls[i].addr, val);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun }
285*4882a593Smuzhiyun
286*4882a593Smuzhiyun #endif
287*4882a593Smuzhiyun
288*4882a593Smuzhiyun /* functions for op_amd_spec */
289*4882a593Smuzhiyun
op_amd_shutdown(struct op_msrs const * const msrs)290*4882a593Smuzhiyun static void op_amd_shutdown(struct op_msrs const * const msrs)
291*4882a593Smuzhiyun {
292*4882a593Smuzhiyun int i;
293*4882a593Smuzhiyun
294*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
295*4882a593Smuzhiyun if (!msrs->counters[i].addr)
296*4882a593Smuzhiyun continue;
297*4882a593Smuzhiyun release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
298*4882a593Smuzhiyun release_evntsel_nmi(MSR_K7_EVNTSEL0 + i);
299*4882a593Smuzhiyun }
300*4882a593Smuzhiyun }
301*4882a593Smuzhiyun
op_amd_fill_in_addresses(struct op_msrs * const msrs)302*4882a593Smuzhiyun static int op_amd_fill_in_addresses(struct op_msrs * const msrs)
303*4882a593Smuzhiyun {
304*4882a593Smuzhiyun int i;
305*4882a593Smuzhiyun
306*4882a593Smuzhiyun for (i = 0; i < num_counters; i++) {
307*4882a593Smuzhiyun if (!reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i))
308*4882a593Smuzhiyun goto fail;
309*4882a593Smuzhiyun if (!reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) {
310*4882a593Smuzhiyun release_perfctr_nmi(MSR_K7_PERFCTR0 + i);
311*4882a593Smuzhiyun goto fail;
312*4882a593Smuzhiyun }
313*4882a593Smuzhiyun /* both registers must be reserved */
314*4882a593Smuzhiyun if (num_counters == AMD64_NUM_COUNTERS_CORE) {
315*4882a593Smuzhiyun msrs->counters[i].addr = MSR_F15H_PERF_CTR + (i << 1);
316*4882a593Smuzhiyun msrs->controls[i].addr = MSR_F15H_PERF_CTL + (i << 1);
317*4882a593Smuzhiyun } else {
318*4882a593Smuzhiyun msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i;
319*4882a593Smuzhiyun msrs->counters[i].addr = MSR_K7_PERFCTR0 + i;
320*4882a593Smuzhiyun }
321*4882a593Smuzhiyun continue;
322*4882a593Smuzhiyun fail:
323*4882a593Smuzhiyun if (!counter_config[i].enabled)
324*4882a593Smuzhiyun continue;
325*4882a593Smuzhiyun op_x86_warn_reserved(i);
326*4882a593Smuzhiyun op_amd_shutdown(msrs);
327*4882a593Smuzhiyun return -EBUSY;
328*4882a593Smuzhiyun }
329*4882a593Smuzhiyun
330*4882a593Smuzhiyun return 0;
331*4882a593Smuzhiyun }
332*4882a593Smuzhiyun
op_amd_setup_ctrs(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)333*4882a593Smuzhiyun static void op_amd_setup_ctrs(struct op_x86_model_spec const *model,
334*4882a593Smuzhiyun struct op_msrs const * const msrs)
335*4882a593Smuzhiyun {
336*4882a593Smuzhiyun u64 val;
337*4882a593Smuzhiyun int i;
338*4882a593Smuzhiyun
339*4882a593Smuzhiyun /* setup reset_value */
340*4882a593Smuzhiyun for (i = 0; i < OP_MAX_COUNTER; ++i) {
341*4882a593Smuzhiyun if (counter_config[i].enabled
342*4882a593Smuzhiyun && msrs->counters[op_x86_virt_to_phys(i)].addr)
343*4882a593Smuzhiyun reset_value[i] = counter_config[i].count;
344*4882a593Smuzhiyun else
345*4882a593Smuzhiyun reset_value[i] = 0;
346*4882a593Smuzhiyun }
347*4882a593Smuzhiyun
348*4882a593Smuzhiyun /* clear all counters */
349*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
350*4882a593Smuzhiyun if (!msrs->controls[i].addr)
351*4882a593Smuzhiyun continue;
352*4882a593Smuzhiyun rdmsrl(msrs->controls[i].addr, val);
353*4882a593Smuzhiyun if (val & ARCH_PERFMON_EVENTSEL_ENABLE)
354*4882a593Smuzhiyun op_x86_warn_in_use(i);
355*4882a593Smuzhiyun val &= model->reserved;
356*4882a593Smuzhiyun wrmsrl(msrs->controls[i].addr, val);
357*4882a593Smuzhiyun /*
358*4882a593Smuzhiyun * avoid a false detection of ctr overflows in NMI
359*4882a593Smuzhiyun * handler
360*4882a593Smuzhiyun */
361*4882a593Smuzhiyun wrmsrl(msrs->counters[i].addr, -1LL);
362*4882a593Smuzhiyun }
363*4882a593Smuzhiyun
364*4882a593Smuzhiyun /* enable active counters */
365*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
366*4882a593Smuzhiyun int virt = op_x86_phys_to_virt(i);
367*4882a593Smuzhiyun if (!reset_value[virt])
368*4882a593Smuzhiyun continue;
369*4882a593Smuzhiyun
370*4882a593Smuzhiyun /* setup counter registers */
371*4882a593Smuzhiyun wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
372*4882a593Smuzhiyun
373*4882a593Smuzhiyun /* setup control registers */
374*4882a593Smuzhiyun rdmsrl(msrs->controls[i].addr, val);
375*4882a593Smuzhiyun val &= model->reserved;
376*4882a593Smuzhiyun val |= op_x86_get_ctrl(model, &counter_config[virt]);
377*4882a593Smuzhiyun wrmsrl(msrs->controls[i].addr, val);
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun }
380*4882a593Smuzhiyun
op_amd_check_ctrs(struct pt_regs * const regs,struct op_msrs const * const msrs)381*4882a593Smuzhiyun static int op_amd_check_ctrs(struct pt_regs * const regs,
382*4882a593Smuzhiyun struct op_msrs const * const msrs)
383*4882a593Smuzhiyun {
384*4882a593Smuzhiyun u64 val;
385*4882a593Smuzhiyun int i;
386*4882a593Smuzhiyun
387*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
388*4882a593Smuzhiyun int virt = op_x86_phys_to_virt(i);
389*4882a593Smuzhiyun if (!reset_value[virt])
390*4882a593Smuzhiyun continue;
391*4882a593Smuzhiyun rdmsrl(msrs->counters[i].addr, val);
392*4882a593Smuzhiyun /* bit is clear if overflowed: */
393*4882a593Smuzhiyun if (val & OP_CTR_OVERFLOW)
394*4882a593Smuzhiyun continue;
395*4882a593Smuzhiyun oprofile_add_sample(regs, virt);
396*4882a593Smuzhiyun wrmsrl(msrs->counters[i].addr, -(u64)reset_value[virt]);
397*4882a593Smuzhiyun }
398*4882a593Smuzhiyun
399*4882a593Smuzhiyun op_amd_handle_ibs(regs, msrs);
400*4882a593Smuzhiyun
401*4882a593Smuzhiyun /* See op_model_ppro.c */
402*4882a593Smuzhiyun return 1;
403*4882a593Smuzhiyun }
404*4882a593Smuzhiyun
op_amd_start(struct op_msrs const * const msrs)405*4882a593Smuzhiyun static void op_amd_start(struct op_msrs const * const msrs)
406*4882a593Smuzhiyun {
407*4882a593Smuzhiyun u64 val;
408*4882a593Smuzhiyun int i;
409*4882a593Smuzhiyun
410*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
411*4882a593Smuzhiyun if (!reset_value[op_x86_phys_to_virt(i)])
412*4882a593Smuzhiyun continue;
413*4882a593Smuzhiyun rdmsrl(msrs->controls[i].addr, val);
414*4882a593Smuzhiyun val |= ARCH_PERFMON_EVENTSEL_ENABLE;
415*4882a593Smuzhiyun wrmsrl(msrs->controls[i].addr, val);
416*4882a593Smuzhiyun }
417*4882a593Smuzhiyun
418*4882a593Smuzhiyun op_amd_start_ibs();
419*4882a593Smuzhiyun }
420*4882a593Smuzhiyun
op_amd_stop(struct op_msrs const * const msrs)421*4882a593Smuzhiyun static void op_amd_stop(struct op_msrs const * const msrs)
422*4882a593Smuzhiyun {
423*4882a593Smuzhiyun u64 val;
424*4882a593Smuzhiyun int i;
425*4882a593Smuzhiyun
426*4882a593Smuzhiyun /*
427*4882a593Smuzhiyun * Subtle: stop on all counters to avoid race with setting our
428*4882a593Smuzhiyun * pm callback
429*4882a593Smuzhiyun */
430*4882a593Smuzhiyun for (i = 0; i < num_counters; ++i) {
431*4882a593Smuzhiyun if (!reset_value[op_x86_phys_to_virt(i)])
432*4882a593Smuzhiyun continue;
433*4882a593Smuzhiyun rdmsrl(msrs->controls[i].addr, val);
434*4882a593Smuzhiyun val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
435*4882a593Smuzhiyun wrmsrl(msrs->controls[i].addr, val);
436*4882a593Smuzhiyun }
437*4882a593Smuzhiyun
438*4882a593Smuzhiyun op_amd_stop_ibs();
439*4882a593Smuzhiyun }
440*4882a593Smuzhiyun
441*4882a593Smuzhiyun /*
442*4882a593Smuzhiyun * check and reserve APIC extended interrupt LVT offset for IBS if
443*4882a593Smuzhiyun * available
444*4882a593Smuzhiyun */
445*4882a593Smuzhiyun
init_ibs(void)446*4882a593Smuzhiyun static void init_ibs(void)
447*4882a593Smuzhiyun {
448*4882a593Smuzhiyun ibs_caps = get_ibs_caps();
449*4882a593Smuzhiyun
450*4882a593Smuzhiyun if (!ibs_caps)
451*4882a593Smuzhiyun return;
452*4882a593Smuzhiyun
453*4882a593Smuzhiyun printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", ibs_caps);
454*4882a593Smuzhiyun }
455*4882a593Smuzhiyun
456*4882a593Smuzhiyun static int (*create_arch_files)(struct dentry *root);
457*4882a593Smuzhiyun
setup_ibs_files(struct dentry * root)458*4882a593Smuzhiyun static int setup_ibs_files(struct dentry *root)
459*4882a593Smuzhiyun {
460*4882a593Smuzhiyun struct dentry *dir;
461*4882a593Smuzhiyun int ret = 0;
462*4882a593Smuzhiyun
463*4882a593Smuzhiyun /* architecture specific files */
464*4882a593Smuzhiyun if (create_arch_files)
465*4882a593Smuzhiyun ret = create_arch_files(root);
466*4882a593Smuzhiyun
467*4882a593Smuzhiyun if (ret)
468*4882a593Smuzhiyun return ret;
469*4882a593Smuzhiyun
470*4882a593Smuzhiyun if (!ibs_caps)
471*4882a593Smuzhiyun return ret;
472*4882a593Smuzhiyun
473*4882a593Smuzhiyun /* model specific files */
474*4882a593Smuzhiyun
475*4882a593Smuzhiyun /* setup some reasonable defaults */
476*4882a593Smuzhiyun memset(&ibs_config, 0, sizeof(ibs_config));
477*4882a593Smuzhiyun ibs_config.max_cnt_fetch = 250000;
478*4882a593Smuzhiyun ibs_config.max_cnt_op = 250000;
479*4882a593Smuzhiyun
480*4882a593Smuzhiyun if (ibs_caps & IBS_CAPS_FETCHSAM) {
481*4882a593Smuzhiyun dir = oprofilefs_mkdir(root, "ibs_fetch");
482*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "enable",
483*4882a593Smuzhiyun &ibs_config.fetch_enabled);
484*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "max_count",
485*4882a593Smuzhiyun &ibs_config.max_cnt_fetch);
486*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "rand_enable",
487*4882a593Smuzhiyun &ibs_config.rand_en);
488*4882a593Smuzhiyun }
489*4882a593Smuzhiyun
490*4882a593Smuzhiyun if (ibs_caps & IBS_CAPS_OPSAM) {
491*4882a593Smuzhiyun dir = oprofilefs_mkdir(root, "ibs_op");
492*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "enable",
493*4882a593Smuzhiyun &ibs_config.op_enabled);
494*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "max_count",
495*4882a593Smuzhiyun &ibs_config.max_cnt_op);
496*4882a593Smuzhiyun if (ibs_caps & IBS_CAPS_OPCNT)
497*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "dispatched_ops",
498*4882a593Smuzhiyun &ibs_config.dispatched_ops);
499*4882a593Smuzhiyun if (ibs_caps & IBS_CAPS_BRNTRGT)
500*4882a593Smuzhiyun oprofilefs_create_ulong(dir, "branch_target",
501*4882a593Smuzhiyun &ibs_config.branch_target);
502*4882a593Smuzhiyun }
503*4882a593Smuzhiyun
504*4882a593Smuzhiyun return 0;
505*4882a593Smuzhiyun }
506*4882a593Smuzhiyun
507*4882a593Smuzhiyun struct op_x86_model_spec op_amd_spec;
508*4882a593Smuzhiyun
op_amd_init(struct oprofile_operations * ops)509*4882a593Smuzhiyun static int op_amd_init(struct oprofile_operations *ops)
510*4882a593Smuzhiyun {
511*4882a593Smuzhiyun init_ibs();
512*4882a593Smuzhiyun create_arch_files = ops->create_files;
513*4882a593Smuzhiyun ops->create_files = setup_ibs_files;
514*4882a593Smuzhiyun
515*4882a593Smuzhiyun if (boot_cpu_data.x86 == 0x15) {
516*4882a593Smuzhiyun num_counters = AMD64_NUM_COUNTERS_CORE;
517*4882a593Smuzhiyun } else {
518*4882a593Smuzhiyun num_counters = AMD64_NUM_COUNTERS;
519*4882a593Smuzhiyun }
520*4882a593Smuzhiyun
521*4882a593Smuzhiyun op_amd_spec.num_counters = num_counters;
522*4882a593Smuzhiyun op_amd_spec.num_controls = num_counters;
523*4882a593Smuzhiyun op_amd_spec.num_virt_counters = max(num_counters, NUM_VIRT_COUNTERS);
524*4882a593Smuzhiyun
525*4882a593Smuzhiyun return 0;
526*4882a593Smuzhiyun }
527*4882a593Smuzhiyun
528*4882a593Smuzhiyun struct op_x86_model_spec op_amd_spec = {
529*4882a593Smuzhiyun /* num_counters/num_controls filled in at runtime */
530*4882a593Smuzhiyun .reserved = MSR_AMD_EVENTSEL_RESERVED,
531*4882a593Smuzhiyun .event_mask = OP_EVENT_MASK,
532*4882a593Smuzhiyun .init = op_amd_init,
533*4882a593Smuzhiyun .fill_in_addresses = &op_amd_fill_in_addresses,
534*4882a593Smuzhiyun .setup_ctrs = &op_amd_setup_ctrs,
535*4882a593Smuzhiyun .check_ctrs = &op_amd_check_ctrs,
536*4882a593Smuzhiyun .start = &op_amd_start,
537*4882a593Smuzhiyun .stop = &op_amd_stop,
538*4882a593Smuzhiyun .shutdown = &op_amd_shutdown,
539*4882a593Smuzhiyun #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX
540*4882a593Smuzhiyun .switch_ctrl = &op_mux_switch_ctrl,
541*4882a593Smuzhiyun #endif
542*4882a593Smuzhiyun };
543