xref: /OK3568_Linux_fs/kernel/arch/x86/oprofile/op_model_p4.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /**
2*4882a593Smuzhiyun  * @file op_model_p4.c
3*4882a593Smuzhiyun  * P4 model-specific MSR operations
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * @remark Copyright 2002 OProfile authors
6*4882a593Smuzhiyun  * @remark Read the file COPYING
7*4882a593Smuzhiyun  *
8*4882a593Smuzhiyun  * @author Graydon Hoare
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <linux/oprofile.h>
12*4882a593Smuzhiyun #include <linux/smp.h>
13*4882a593Smuzhiyun #include <linux/ptrace.h>
14*4882a593Smuzhiyun #include <asm/nmi.h>
15*4882a593Smuzhiyun #include <asm/msr.h>
16*4882a593Smuzhiyun #include <asm/fixmap.h>
17*4882a593Smuzhiyun #include <asm/apic.h>
18*4882a593Smuzhiyun 
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun #include "op_x86_model.h"
21*4882a593Smuzhiyun #include "op_counter.h"
22*4882a593Smuzhiyun 
23*4882a593Smuzhiyun #define NUM_EVENTS 39
24*4882a593Smuzhiyun 
25*4882a593Smuzhiyun #define NUM_COUNTERS_NON_HT 8
26*4882a593Smuzhiyun #define NUM_ESCRS_NON_HT 45
27*4882a593Smuzhiyun #define NUM_CCCRS_NON_HT 18
28*4882a593Smuzhiyun #define NUM_CONTROLS_NON_HT (NUM_ESCRS_NON_HT + NUM_CCCRS_NON_HT)
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun #define NUM_COUNTERS_HT2 4
31*4882a593Smuzhiyun #define NUM_ESCRS_HT2 23
32*4882a593Smuzhiyun #define NUM_CCCRS_HT2 9
33*4882a593Smuzhiyun #define NUM_CONTROLS_HT2 (NUM_ESCRS_HT2 + NUM_CCCRS_HT2)
34*4882a593Smuzhiyun 
35*4882a593Smuzhiyun #define OP_CTR_OVERFLOW			(1ULL<<31)
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun static unsigned int num_counters = NUM_COUNTERS_NON_HT;
38*4882a593Smuzhiyun static unsigned int num_controls = NUM_CONTROLS_NON_HT;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun /* this has to be checked dynamically since the
41*4882a593Smuzhiyun    hyper-threadedness of a chip is discovered at
42*4882a593Smuzhiyun    kernel boot-time. */
setup_num_counters(void)43*4882a593Smuzhiyun static inline void setup_num_counters(void)
44*4882a593Smuzhiyun {
45*4882a593Smuzhiyun #ifdef CONFIG_SMP
46*4882a593Smuzhiyun 	if (smp_num_siblings == 2) {
47*4882a593Smuzhiyun 		num_counters = NUM_COUNTERS_HT2;
48*4882a593Smuzhiyun 		num_controls = NUM_CONTROLS_HT2;
49*4882a593Smuzhiyun 	}
50*4882a593Smuzhiyun #endif
51*4882a593Smuzhiyun }
52*4882a593Smuzhiyun 
addr_increment(void)53*4882a593Smuzhiyun static inline int addr_increment(void)
54*4882a593Smuzhiyun {
55*4882a593Smuzhiyun #ifdef CONFIG_SMP
56*4882a593Smuzhiyun 	return smp_num_siblings == 2 ? 2 : 1;
57*4882a593Smuzhiyun #else
58*4882a593Smuzhiyun 	return 1;
59*4882a593Smuzhiyun #endif
60*4882a593Smuzhiyun }
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun 
63*4882a593Smuzhiyun /* tables to simulate simplified hardware view of p4 registers */
64*4882a593Smuzhiyun struct p4_counter_binding {
65*4882a593Smuzhiyun 	int virt_counter;
66*4882a593Smuzhiyun 	int counter_address;
67*4882a593Smuzhiyun 	int cccr_address;
68*4882a593Smuzhiyun };
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun struct p4_event_binding {
71*4882a593Smuzhiyun 	int escr_select;  /* value to put in CCCR */
72*4882a593Smuzhiyun 	int event_select; /* value to put in ESCR */
73*4882a593Smuzhiyun 	struct {
74*4882a593Smuzhiyun 		int virt_counter; /* for this counter... */
75*4882a593Smuzhiyun 		int escr_address; /* use this ESCR       */
76*4882a593Smuzhiyun 	} bindings[2];
77*4882a593Smuzhiyun };
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun /* nb: these CTR_* defines are a duplicate of defines in
80*4882a593Smuzhiyun    event/i386.p4*events. */
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun #define CTR_BPU_0      (1 << 0)
84*4882a593Smuzhiyun #define CTR_MS_0       (1 << 1)
85*4882a593Smuzhiyun #define CTR_FLAME_0    (1 << 2)
86*4882a593Smuzhiyun #define CTR_IQ_4       (1 << 3)
87*4882a593Smuzhiyun #define CTR_BPU_2      (1 << 4)
88*4882a593Smuzhiyun #define CTR_MS_2       (1 << 5)
89*4882a593Smuzhiyun #define CTR_FLAME_2    (1 << 6)
90*4882a593Smuzhiyun #define CTR_IQ_5       (1 << 7)
91*4882a593Smuzhiyun 
92*4882a593Smuzhiyun static struct p4_counter_binding p4_counters[NUM_COUNTERS_NON_HT] = {
93*4882a593Smuzhiyun 	{ CTR_BPU_0,   MSR_P4_BPU_PERFCTR0,   MSR_P4_BPU_CCCR0 },
94*4882a593Smuzhiyun 	{ CTR_MS_0,    MSR_P4_MS_PERFCTR0,    MSR_P4_MS_CCCR0 },
95*4882a593Smuzhiyun 	{ CTR_FLAME_0, MSR_P4_FLAME_PERFCTR0, MSR_P4_FLAME_CCCR0 },
96*4882a593Smuzhiyun 	{ CTR_IQ_4,    MSR_P4_IQ_PERFCTR4,    MSR_P4_IQ_CCCR4 },
97*4882a593Smuzhiyun 	{ CTR_BPU_2,   MSR_P4_BPU_PERFCTR2,   MSR_P4_BPU_CCCR2 },
98*4882a593Smuzhiyun 	{ CTR_MS_2,    MSR_P4_MS_PERFCTR2,    MSR_P4_MS_CCCR2 },
99*4882a593Smuzhiyun 	{ CTR_FLAME_2, MSR_P4_FLAME_PERFCTR2, MSR_P4_FLAME_CCCR2 },
100*4882a593Smuzhiyun 	{ CTR_IQ_5,    MSR_P4_IQ_PERFCTR5,    MSR_P4_IQ_CCCR5 }
101*4882a593Smuzhiyun };
102*4882a593Smuzhiyun 
103*4882a593Smuzhiyun #define NUM_UNUSED_CCCRS (NUM_CCCRS_NON_HT - NUM_COUNTERS_NON_HT)
104*4882a593Smuzhiyun 
105*4882a593Smuzhiyun /* p4 event codes in libop/op_event.h are indices into this table. */
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun static struct p4_event_binding p4_events[NUM_EVENTS] = {
108*4882a593Smuzhiyun 
109*4882a593Smuzhiyun 	{ /* BRANCH_RETIRED */
110*4882a593Smuzhiyun 		0x05, 0x06,
111*4882a593Smuzhiyun 		{ {CTR_IQ_4, MSR_P4_CRU_ESCR2},
112*4882a593Smuzhiyun 		  {CTR_IQ_5, MSR_P4_CRU_ESCR3} }
113*4882a593Smuzhiyun 	},
114*4882a593Smuzhiyun 
115*4882a593Smuzhiyun 	{ /* MISPRED_BRANCH_RETIRED */
116*4882a593Smuzhiyun 		0x04, 0x03,
117*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
118*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
119*4882a593Smuzhiyun 	},
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun 	{ /* TC_DELIVER_MODE */
122*4882a593Smuzhiyun 		0x01, 0x01,
123*4882a593Smuzhiyun 		{ { CTR_MS_0, MSR_P4_TC_ESCR0},
124*4882a593Smuzhiyun 		  { CTR_MS_2, MSR_P4_TC_ESCR1} }
125*4882a593Smuzhiyun 	},
126*4882a593Smuzhiyun 
127*4882a593Smuzhiyun 	{ /* BPU_FETCH_REQUEST */
128*4882a593Smuzhiyun 		0x00, 0x03,
129*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_BPU_ESCR0},
130*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_BPU_ESCR1} }
131*4882a593Smuzhiyun 	},
132*4882a593Smuzhiyun 
133*4882a593Smuzhiyun 	{ /* ITLB_REFERENCE */
134*4882a593Smuzhiyun 		0x03, 0x18,
135*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_ITLB_ESCR0},
136*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_ITLB_ESCR1} }
137*4882a593Smuzhiyun 	},
138*4882a593Smuzhiyun 
139*4882a593Smuzhiyun 	{ /* MEMORY_CANCEL */
140*4882a593Smuzhiyun 		0x05, 0x02,
141*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_DAC_ESCR0},
142*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_DAC_ESCR1} }
143*4882a593Smuzhiyun 	},
144*4882a593Smuzhiyun 
145*4882a593Smuzhiyun 	{ /* MEMORY_COMPLETE */
146*4882a593Smuzhiyun 		0x02, 0x08,
147*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
148*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
149*4882a593Smuzhiyun 	},
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 	{ /* LOAD_PORT_REPLAY */
152*4882a593Smuzhiyun 		0x02, 0x04,
153*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
154*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
155*4882a593Smuzhiyun 	},
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	{ /* STORE_PORT_REPLAY */
158*4882a593Smuzhiyun 		0x02, 0x05,
159*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_SAAT_ESCR0},
160*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_SAAT_ESCR1} }
161*4882a593Smuzhiyun 	},
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	{ /* MOB_LOAD_REPLAY */
164*4882a593Smuzhiyun 		0x02, 0x03,
165*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_MOB_ESCR0},
166*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_MOB_ESCR1} }
167*4882a593Smuzhiyun 	},
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun 	{ /* PAGE_WALK_TYPE */
170*4882a593Smuzhiyun 		0x04, 0x01,
171*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_PMH_ESCR0},
172*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_PMH_ESCR1} }
173*4882a593Smuzhiyun 	},
174*4882a593Smuzhiyun 
175*4882a593Smuzhiyun 	{ /* BSQ_CACHE_REFERENCE */
176*4882a593Smuzhiyun 		0x07, 0x0c,
177*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
178*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_BSU_ESCR1} }
179*4882a593Smuzhiyun 	},
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	{ /* IOQ_ALLOCATION */
182*4882a593Smuzhiyun 		0x06, 0x03,
183*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
184*4882a593Smuzhiyun 		  { 0, 0 } }
185*4882a593Smuzhiyun 	},
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	{ /* IOQ_ACTIVE_ENTRIES */
188*4882a593Smuzhiyun 		0x06, 0x1a,
189*4882a593Smuzhiyun 		{ { CTR_BPU_2, MSR_P4_FSB_ESCR1},
190*4882a593Smuzhiyun 		  { 0, 0 } }
191*4882a593Smuzhiyun 	},
192*4882a593Smuzhiyun 
193*4882a593Smuzhiyun 	{ /* FSB_DATA_ACTIVITY */
194*4882a593Smuzhiyun 		0x06, 0x17,
195*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
196*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
197*4882a593Smuzhiyun 	},
198*4882a593Smuzhiyun 
199*4882a593Smuzhiyun 	{ /* BSQ_ALLOCATION */
200*4882a593Smuzhiyun 		0x07, 0x05,
201*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_BSU_ESCR0},
202*4882a593Smuzhiyun 		  { 0, 0 } }
203*4882a593Smuzhiyun 	},
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	{ /* BSQ_ACTIVE_ENTRIES */
206*4882a593Smuzhiyun 		0x07, 0x06,
207*4882a593Smuzhiyun 		{ { CTR_BPU_2, MSR_P4_BSU_ESCR1 /* guess */},
208*4882a593Smuzhiyun 		  { 0, 0 } }
209*4882a593Smuzhiyun 	},
210*4882a593Smuzhiyun 
211*4882a593Smuzhiyun 	{ /* X87_ASSIST */
212*4882a593Smuzhiyun 		0x05, 0x03,
213*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
214*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
215*4882a593Smuzhiyun 	},
216*4882a593Smuzhiyun 
217*4882a593Smuzhiyun 	{ /* SSE_INPUT_ASSIST */
218*4882a593Smuzhiyun 		0x01, 0x34,
219*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
220*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
221*4882a593Smuzhiyun 	},
222*4882a593Smuzhiyun 
223*4882a593Smuzhiyun 	{ /* PACKED_SP_UOP */
224*4882a593Smuzhiyun 		0x01, 0x08,
225*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
226*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
227*4882a593Smuzhiyun 	},
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 	{ /* PACKED_DP_UOP */
230*4882a593Smuzhiyun 		0x01, 0x0c,
231*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
232*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
233*4882a593Smuzhiyun 	},
234*4882a593Smuzhiyun 
235*4882a593Smuzhiyun 	{ /* SCALAR_SP_UOP */
236*4882a593Smuzhiyun 		0x01, 0x0a,
237*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
238*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
239*4882a593Smuzhiyun 	},
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	{ /* SCALAR_DP_UOP */
242*4882a593Smuzhiyun 		0x01, 0x0e,
243*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
244*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
245*4882a593Smuzhiyun 	},
246*4882a593Smuzhiyun 
247*4882a593Smuzhiyun 	{ /* 64BIT_MMX_UOP */
248*4882a593Smuzhiyun 		0x01, 0x02,
249*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
250*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
251*4882a593Smuzhiyun 	},
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 	{ /* 128BIT_MMX_UOP */
254*4882a593Smuzhiyun 		0x01, 0x1a,
255*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
256*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
257*4882a593Smuzhiyun 	},
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	{ /* X87_FP_UOP */
260*4882a593Smuzhiyun 		0x01, 0x04,
261*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
262*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
263*4882a593Smuzhiyun 	},
264*4882a593Smuzhiyun 
265*4882a593Smuzhiyun 	{ /* X87_SIMD_MOVES_UOP */
266*4882a593Smuzhiyun 		0x01, 0x2e,
267*4882a593Smuzhiyun 		{ { CTR_FLAME_0, MSR_P4_FIRM_ESCR0},
268*4882a593Smuzhiyun 		  { CTR_FLAME_2, MSR_P4_FIRM_ESCR1} }
269*4882a593Smuzhiyun 	},
270*4882a593Smuzhiyun 
271*4882a593Smuzhiyun 	{ /* MACHINE_CLEAR */
272*4882a593Smuzhiyun 		0x05, 0x02,
273*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
274*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
275*4882a593Smuzhiyun 	},
276*4882a593Smuzhiyun 
277*4882a593Smuzhiyun 	{ /* GLOBAL_POWER_EVENTS */
278*4882a593Smuzhiyun 		0x06, 0x13 /* older manual says 0x05, newer 0x13 */,
279*4882a593Smuzhiyun 		{ { CTR_BPU_0, MSR_P4_FSB_ESCR0},
280*4882a593Smuzhiyun 		  { CTR_BPU_2, MSR_P4_FSB_ESCR1} }
281*4882a593Smuzhiyun 	},
282*4882a593Smuzhiyun 
283*4882a593Smuzhiyun 	{ /* TC_MS_XFER */
284*4882a593Smuzhiyun 		0x00, 0x05,
285*4882a593Smuzhiyun 		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
286*4882a593Smuzhiyun 		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
287*4882a593Smuzhiyun 	},
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	{ /* UOP_QUEUE_WRITES */
290*4882a593Smuzhiyun 		0x00, 0x09,
291*4882a593Smuzhiyun 		{ { CTR_MS_0, MSR_P4_MS_ESCR0},
292*4882a593Smuzhiyun 		  { CTR_MS_2, MSR_P4_MS_ESCR1} }
293*4882a593Smuzhiyun 	},
294*4882a593Smuzhiyun 
295*4882a593Smuzhiyun 	{ /* FRONT_END_EVENT */
296*4882a593Smuzhiyun 		0x05, 0x08,
297*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
298*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
299*4882a593Smuzhiyun 	},
300*4882a593Smuzhiyun 
301*4882a593Smuzhiyun 	{ /* EXECUTION_EVENT */
302*4882a593Smuzhiyun 		0x05, 0x0c,
303*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
304*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
305*4882a593Smuzhiyun 	},
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun 	{ /* REPLAY_EVENT */
308*4882a593Smuzhiyun 		0x05, 0x09,
309*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR2},
310*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR3} }
311*4882a593Smuzhiyun 	},
312*4882a593Smuzhiyun 
313*4882a593Smuzhiyun 	{ /* INSTR_RETIRED */
314*4882a593Smuzhiyun 		0x04, 0x02,
315*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
316*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
317*4882a593Smuzhiyun 	},
318*4882a593Smuzhiyun 
319*4882a593Smuzhiyun 	{ /* UOPS_RETIRED */
320*4882a593Smuzhiyun 		0x04, 0x01,
321*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_CRU_ESCR0},
322*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_CRU_ESCR1} }
323*4882a593Smuzhiyun 	},
324*4882a593Smuzhiyun 
325*4882a593Smuzhiyun 	{ /* UOP_TYPE */
326*4882a593Smuzhiyun 		0x02, 0x02,
327*4882a593Smuzhiyun 		{ { CTR_IQ_4, MSR_P4_RAT_ESCR0},
328*4882a593Smuzhiyun 		  { CTR_IQ_5, MSR_P4_RAT_ESCR1} }
329*4882a593Smuzhiyun 	},
330*4882a593Smuzhiyun 
331*4882a593Smuzhiyun 	{ /* RETIRED_MISPRED_BRANCH_TYPE */
332*4882a593Smuzhiyun 		0x02, 0x05,
333*4882a593Smuzhiyun 		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
334*4882a593Smuzhiyun 		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
335*4882a593Smuzhiyun 	},
336*4882a593Smuzhiyun 
337*4882a593Smuzhiyun 	{ /* RETIRED_BRANCH_TYPE */
338*4882a593Smuzhiyun 		0x02, 0x04,
339*4882a593Smuzhiyun 		{ { CTR_MS_0, MSR_P4_TBPU_ESCR0},
340*4882a593Smuzhiyun 		  { CTR_MS_2, MSR_P4_TBPU_ESCR1} }
341*4882a593Smuzhiyun 	}
342*4882a593Smuzhiyun };
343*4882a593Smuzhiyun 
344*4882a593Smuzhiyun 
345*4882a593Smuzhiyun #define MISC_PMC_ENABLED_P(x) ((x) & 1 << 7)
346*4882a593Smuzhiyun 
347*4882a593Smuzhiyun #define ESCR_RESERVED_BITS 0x80000003
348*4882a593Smuzhiyun #define ESCR_CLEAR(escr) ((escr) &= ESCR_RESERVED_BITS)
349*4882a593Smuzhiyun #define ESCR_SET_USR_0(escr, usr) ((escr) |= (((usr) & 1) << 2))
350*4882a593Smuzhiyun #define ESCR_SET_OS_0(escr, os) ((escr) |= (((os) & 1) << 3))
351*4882a593Smuzhiyun #define ESCR_SET_USR_1(escr, usr) ((escr) |= (((usr) & 1)))
352*4882a593Smuzhiyun #define ESCR_SET_OS_1(escr, os) ((escr) |= (((os) & 1) << 1))
353*4882a593Smuzhiyun #define ESCR_SET_EVENT_SELECT(escr, sel) ((escr) |= (((sel) & 0x3f) << 25))
354*4882a593Smuzhiyun #define ESCR_SET_EVENT_MASK(escr, mask) ((escr) |= (((mask) & 0xffff) << 9))
355*4882a593Smuzhiyun 
356*4882a593Smuzhiyun #define CCCR_RESERVED_BITS 0x38030FFF
357*4882a593Smuzhiyun #define CCCR_CLEAR(cccr) ((cccr) &= CCCR_RESERVED_BITS)
358*4882a593Smuzhiyun #define CCCR_SET_REQUIRED_BITS(cccr) ((cccr) |= 0x00030000)
359*4882a593Smuzhiyun #define CCCR_SET_ESCR_SELECT(cccr, sel) ((cccr) |= (((sel) & 0x07) << 13))
360*4882a593Smuzhiyun #define CCCR_SET_PMI_OVF_0(cccr) ((cccr) |= (1<<26))
361*4882a593Smuzhiyun #define CCCR_SET_PMI_OVF_1(cccr) ((cccr) |= (1<<27))
362*4882a593Smuzhiyun #define CCCR_SET_ENABLE(cccr) ((cccr) |= (1<<12))
363*4882a593Smuzhiyun #define CCCR_SET_DISABLE(cccr) ((cccr) &= ~(1<<12))
364*4882a593Smuzhiyun #define CCCR_OVF_P(cccr) ((cccr) & (1U<<31))
365*4882a593Smuzhiyun #define CCCR_CLEAR_OVF(cccr) ((cccr) &= (~(1U<<31)))
366*4882a593Smuzhiyun 
367*4882a593Smuzhiyun 
368*4882a593Smuzhiyun /* this assigns a "stagger" to the current CPU, which is used throughout
369*4882a593Smuzhiyun    the code in this module as an extra array offset, to select the "even"
370*4882a593Smuzhiyun    or "odd" part of all the divided resources. */
get_stagger(void)371*4882a593Smuzhiyun static unsigned int get_stagger(void)
372*4882a593Smuzhiyun {
373*4882a593Smuzhiyun #ifdef CONFIG_SMP
374*4882a593Smuzhiyun 	int cpu = smp_processor_id();
375*4882a593Smuzhiyun 	return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
376*4882a593Smuzhiyun #endif
377*4882a593Smuzhiyun 	return 0;
378*4882a593Smuzhiyun }
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 
381*4882a593Smuzhiyun /* finally, mediate access to a real hardware counter
382*4882a593Smuzhiyun    by passing a "virtual" counter numer to this macro,
383*4882a593Smuzhiyun    along with your stagger setting. */
384*4882a593Smuzhiyun #define VIRT_CTR(stagger, i) ((i) + ((num_counters) * (stagger)))
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun static unsigned long reset_value[NUM_COUNTERS_NON_HT];
387*4882a593Smuzhiyun 
p4_shutdown(struct op_msrs const * const msrs)388*4882a593Smuzhiyun static void p4_shutdown(struct op_msrs const * const msrs)
389*4882a593Smuzhiyun {
390*4882a593Smuzhiyun 	int i;
391*4882a593Smuzhiyun 
392*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
393*4882a593Smuzhiyun 		if (msrs->counters[i].addr)
394*4882a593Smuzhiyun 			release_perfctr_nmi(msrs->counters[i].addr);
395*4882a593Smuzhiyun 	}
396*4882a593Smuzhiyun 	/*
397*4882a593Smuzhiyun 	 * some of the control registers are specially reserved in
398*4882a593Smuzhiyun 	 * conjunction with the counter registers (hence the starting offset).
399*4882a593Smuzhiyun 	 * This saves a few bits.
400*4882a593Smuzhiyun 	 */
401*4882a593Smuzhiyun 	for (i = num_counters; i < num_controls; ++i) {
402*4882a593Smuzhiyun 		if (msrs->controls[i].addr)
403*4882a593Smuzhiyun 			release_evntsel_nmi(msrs->controls[i].addr);
404*4882a593Smuzhiyun 	}
405*4882a593Smuzhiyun }
406*4882a593Smuzhiyun 
p4_fill_in_addresses(struct op_msrs * const msrs)407*4882a593Smuzhiyun static int p4_fill_in_addresses(struct op_msrs * const msrs)
408*4882a593Smuzhiyun {
409*4882a593Smuzhiyun 	unsigned int i;
410*4882a593Smuzhiyun 	unsigned int addr, cccraddr, stag;
411*4882a593Smuzhiyun 
412*4882a593Smuzhiyun 	setup_num_counters();
413*4882a593Smuzhiyun 	stag = get_stagger();
414*4882a593Smuzhiyun 
415*4882a593Smuzhiyun 	/* the counter & cccr registers we pay attention to */
416*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
417*4882a593Smuzhiyun 		addr = p4_counters[VIRT_CTR(stag, i)].counter_address;
418*4882a593Smuzhiyun 		cccraddr = p4_counters[VIRT_CTR(stag, i)].cccr_address;
419*4882a593Smuzhiyun 		if (reserve_perfctr_nmi(addr)) {
420*4882a593Smuzhiyun 			msrs->counters[i].addr = addr;
421*4882a593Smuzhiyun 			msrs->controls[i].addr = cccraddr;
422*4882a593Smuzhiyun 		}
423*4882a593Smuzhiyun 	}
424*4882a593Smuzhiyun 
425*4882a593Smuzhiyun 	/* 43 ESCR registers in three or four discontiguous group */
426*4882a593Smuzhiyun 	for (addr = MSR_P4_BSU_ESCR0 + stag;
427*4882a593Smuzhiyun 	     addr < MSR_P4_IQ_ESCR0; ++i, addr += addr_increment()) {
428*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(addr))
429*4882a593Smuzhiyun 			msrs->controls[i].addr = addr;
430*4882a593Smuzhiyun 	}
431*4882a593Smuzhiyun 
432*4882a593Smuzhiyun 	/* no IQ_ESCR0/1 on some models, we save a seconde time BSU_ESCR0/1
433*4882a593Smuzhiyun 	 * to avoid special case in nmi_{save|restore}_registers() */
434*4882a593Smuzhiyun 	if (boot_cpu_data.x86_model >= 0x3) {
435*4882a593Smuzhiyun 		for (addr = MSR_P4_BSU_ESCR0 + stag;
436*4882a593Smuzhiyun 		     addr <= MSR_P4_BSU_ESCR1; ++i, addr += addr_increment()) {
437*4882a593Smuzhiyun 			if (reserve_evntsel_nmi(addr))
438*4882a593Smuzhiyun 				msrs->controls[i].addr = addr;
439*4882a593Smuzhiyun 		}
440*4882a593Smuzhiyun 	} else {
441*4882a593Smuzhiyun 		for (addr = MSR_P4_IQ_ESCR0 + stag;
442*4882a593Smuzhiyun 		     addr <= MSR_P4_IQ_ESCR1; ++i, addr += addr_increment()) {
443*4882a593Smuzhiyun 			if (reserve_evntsel_nmi(addr))
444*4882a593Smuzhiyun 				msrs->controls[i].addr = addr;
445*4882a593Smuzhiyun 		}
446*4882a593Smuzhiyun 	}
447*4882a593Smuzhiyun 
448*4882a593Smuzhiyun 	for (addr = MSR_P4_RAT_ESCR0 + stag;
449*4882a593Smuzhiyun 	     addr <= MSR_P4_SSU_ESCR0; ++i, addr += addr_increment()) {
450*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(addr))
451*4882a593Smuzhiyun 			msrs->controls[i].addr = addr;
452*4882a593Smuzhiyun 	}
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	for (addr = MSR_P4_MS_ESCR0 + stag;
455*4882a593Smuzhiyun 	     addr <= MSR_P4_TC_ESCR1; ++i, addr += addr_increment()) {
456*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(addr))
457*4882a593Smuzhiyun 			msrs->controls[i].addr = addr;
458*4882a593Smuzhiyun 	}
459*4882a593Smuzhiyun 
460*4882a593Smuzhiyun 	for (addr = MSR_P4_IX_ESCR0 + stag;
461*4882a593Smuzhiyun 	     addr <= MSR_P4_CRU_ESCR3; ++i, addr += addr_increment()) {
462*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(addr))
463*4882a593Smuzhiyun 			msrs->controls[i].addr = addr;
464*4882a593Smuzhiyun 	}
465*4882a593Smuzhiyun 
466*4882a593Smuzhiyun 	/* there are 2 remaining non-contiguously located ESCRs */
467*4882a593Smuzhiyun 
468*4882a593Smuzhiyun 	if (num_counters == NUM_COUNTERS_NON_HT) {
469*4882a593Smuzhiyun 		/* standard non-HT CPUs handle both remaining ESCRs*/
470*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5))
471*4882a593Smuzhiyun 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
472*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
473*4882a593Smuzhiyun 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	} else if (stag == 0) {
476*4882a593Smuzhiyun 		/* HT CPUs give the first remainder to the even thread, as
477*4882a593Smuzhiyun 		   the 32nd control register */
478*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR4))
479*4882a593Smuzhiyun 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR4;
480*4882a593Smuzhiyun 
481*4882a593Smuzhiyun 	} else {
482*4882a593Smuzhiyun 		/* and two copies of the second to the odd thread,
483*4882a593Smuzhiyun 		   for the 22st and 23nd control registers */
484*4882a593Smuzhiyun 		if (reserve_evntsel_nmi(MSR_P4_CRU_ESCR5)) {
485*4882a593Smuzhiyun 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
486*4882a593Smuzhiyun 			msrs->controls[i++].addr = MSR_P4_CRU_ESCR5;
487*4882a593Smuzhiyun 		}
488*4882a593Smuzhiyun 	}
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
491*4882a593Smuzhiyun 		if (!counter_config[i].enabled)
492*4882a593Smuzhiyun 			continue;
493*4882a593Smuzhiyun 		if (msrs->controls[i].addr)
494*4882a593Smuzhiyun 			continue;
495*4882a593Smuzhiyun 		op_x86_warn_reserved(i);
496*4882a593Smuzhiyun 		p4_shutdown(msrs);
497*4882a593Smuzhiyun 		return -EBUSY;
498*4882a593Smuzhiyun 	}
499*4882a593Smuzhiyun 
500*4882a593Smuzhiyun 	return 0;
501*4882a593Smuzhiyun }
502*4882a593Smuzhiyun 
503*4882a593Smuzhiyun 
pmc_setup_one_p4_counter(unsigned int ctr)504*4882a593Smuzhiyun static void pmc_setup_one_p4_counter(unsigned int ctr)
505*4882a593Smuzhiyun {
506*4882a593Smuzhiyun 	int i;
507*4882a593Smuzhiyun 	int const maxbind = 2;
508*4882a593Smuzhiyun 	unsigned int cccr = 0;
509*4882a593Smuzhiyun 	unsigned int escr = 0;
510*4882a593Smuzhiyun 	unsigned int high = 0;
511*4882a593Smuzhiyun 	unsigned int counter_bit;
512*4882a593Smuzhiyun 	struct p4_event_binding *ev = NULL;
513*4882a593Smuzhiyun 	unsigned int stag;
514*4882a593Smuzhiyun 
515*4882a593Smuzhiyun 	stag = get_stagger();
516*4882a593Smuzhiyun 
517*4882a593Smuzhiyun 	/* convert from counter *number* to counter *bit* */
518*4882a593Smuzhiyun 	counter_bit = 1 << VIRT_CTR(stag, ctr);
519*4882a593Smuzhiyun 
520*4882a593Smuzhiyun 	/* find our event binding structure. */
521*4882a593Smuzhiyun 	if (counter_config[ctr].event <= 0 || counter_config[ctr].event > NUM_EVENTS) {
522*4882a593Smuzhiyun 		printk(KERN_ERR
523*4882a593Smuzhiyun 		       "oprofile: P4 event code 0x%lx out of range\n",
524*4882a593Smuzhiyun 		       counter_config[ctr].event);
525*4882a593Smuzhiyun 		return;
526*4882a593Smuzhiyun 	}
527*4882a593Smuzhiyun 
528*4882a593Smuzhiyun 	ev = &(p4_events[counter_config[ctr].event - 1]);
529*4882a593Smuzhiyun 
530*4882a593Smuzhiyun 	for (i = 0; i < maxbind; i++) {
531*4882a593Smuzhiyun 		if (ev->bindings[i].virt_counter & counter_bit) {
532*4882a593Smuzhiyun 
533*4882a593Smuzhiyun 			/* modify ESCR */
534*4882a593Smuzhiyun 			rdmsr(ev->bindings[i].escr_address, escr, high);
535*4882a593Smuzhiyun 			ESCR_CLEAR(escr);
536*4882a593Smuzhiyun 			if (stag == 0) {
537*4882a593Smuzhiyun 				ESCR_SET_USR_0(escr, counter_config[ctr].user);
538*4882a593Smuzhiyun 				ESCR_SET_OS_0(escr, counter_config[ctr].kernel);
539*4882a593Smuzhiyun 			} else {
540*4882a593Smuzhiyun 				ESCR_SET_USR_1(escr, counter_config[ctr].user);
541*4882a593Smuzhiyun 				ESCR_SET_OS_1(escr, counter_config[ctr].kernel);
542*4882a593Smuzhiyun 			}
543*4882a593Smuzhiyun 			ESCR_SET_EVENT_SELECT(escr, ev->event_select);
544*4882a593Smuzhiyun 			ESCR_SET_EVENT_MASK(escr, counter_config[ctr].unit_mask);
545*4882a593Smuzhiyun 			wrmsr(ev->bindings[i].escr_address, escr, high);
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 			/* modify CCCR */
548*4882a593Smuzhiyun 			rdmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
549*4882a593Smuzhiyun 			      cccr, high);
550*4882a593Smuzhiyun 			CCCR_CLEAR(cccr);
551*4882a593Smuzhiyun 			CCCR_SET_REQUIRED_BITS(cccr);
552*4882a593Smuzhiyun 			CCCR_SET_ESCR_SELECT(cccr, ev->escr_select);
553*4882a593Smuzhiyun 			if (stag == 0)
554*4882a593Smuzhiyun 				CCCR_SET_PMI_OVF_0(cccr);
555*4882a593Smuzhiyun 			else
556*4882a593Smuzhiyun 				CCCR_SET_PMI_OVF_1(cccr);
557*4882a593Smuzhiyun 			wrmsr(p4_counters[VIRT_CTR(stag, ctr)].cccr_address,
558*4882a593Smuzhiyun 			      cccr, high);
559*4882a593Smuzhiyun 			return;
560*4882a593Smuzhiyun 		}
561*4882a593Smuzhiyun 	}
562*4882a593Smuzhiyun 
563*4882a593Smuzhiyun 	printk(KERN_ERR
564*4882a593Smuzhiyun 	       "oprofile: P4 event code 0x%lx no binding, stag %d ctr %d\n",
565*4882a593Smuzhiyun 	       counter_config[ctr].event, stag, ctr);
566*4882a593Smuzhiyun }
567*4882a593Smuzhiyun 
568*4882a593Smuzhiyun 
p4_setup_ctrs(struct op_x86_model_spec const * model,struct op_msrs const * const msrs)569*4882a593Smuzhiyun static void p4_setup_ctrs(struct op_x86_model_spec const *model,
570*4882a593Smuzhiyun 			  struct op_msrs const * const msrs)
571*4882a593Smuzhiyun {
572*4882a593Smuzhiyun 	unsigned int i;
573*4882a593Smuzhiyun 	unsigned int low, high;
574*4882a593Smuzhiyun 	unsigned int stag;
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun 	stag = get_stagger();
577*4882a593Smuzhiyun 
578*4882a593Smuzhiyun 	rdmsr(MSR_IA32_MISC_ENABLE, low, high);
579*4882a593Smuzhiyun 	if (!MISC_PMC_ENABLED_P(low)) {
580*4882a593Smuzhiyun 		printk(KERN_ERR "oprofile: P4 PMC not available\n");
581*4882a593Smuzhiyun 		return;
582*4882a593Smuzhiyun 	}
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	/* clear the cccrs we will use */
585*4882a593Smuzhiyun 	for (i = 0; i < num_counters; i++) {
586*4882a593Smuzhiyun 		if (unlikely(!msrs->controls[i].addr))
587*4882a593Smuzhiyun 			continue;
588*4882a593Smuzhiyun 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
589*4882a593Smuzhiyun 		CCCR_CLEAR(low);
590*4882a593Smuzhiyun 		CCCR_SET_REQUIRED_BITS(low);
591*4882a593Smuzhiyun 		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
592*4882a593Smuzhiyun 	}
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 	/* clear all escrs (including those outside our concern) */
595*4882a593Smuzhiyun 	for (i = num_counters; i < num_controls; i++) {
596*4882a593Smuzhiyun 		if (unlikely(!msrs->controls[i].addr))
597*4882a593Smuzhiyun 			continue;
598*4882a593Smuzhiyun 		wrmsr(msrs->controls[i].addr, 0, 0);
599*4882a593Smuzhiyun 	}
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 	/* setup all counters */
602*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
603*4882a593Smuzhiyun 		if (counter_config[i].enabled && msrs->controls[i].addr) {
604*4882a593Smuzhiyun 			reset_value[i] = counter_config[i].count;
605*4882a593Smuzhiyun 			pmc_setup_one_p4_counter(i);
606*4882a593Smuzhiyun 			wrmsrl(p4_counters[VIRT_CTR(stag, i)].counter_address,
607*4882a593Smuzhiyun 			       -(u64)counter_config[i].count);
608*4882a593Smuzhiyun 		} else {
609*4882a593Smuzhiyun 			reset_value[i] = 0;
610*4882a593Smuzhiyun 		}
611*4882a593Smuzhiyun 	}
612*4882a593Smuzhiyun }
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun 
p4_check_ctrs(struct pt_regs * const regs,struct op_msrs const * const msrs)615*4882a593Smuzhiyun static int p4_check_ctrs(struct pt_regs * const regs,
616*4882a593Smuzhiyun 			 struct op_msrs const * const msrs)
617*4882a593Smuzhiyun {
618*4882a593Smuzhiyun 	unsigned long ctr, low, high, stag, real;
619*4882a593Smuzhiyun 	int i;
620*4882a593Smuzhiyun 
621*4882a593Smuzhiyun 	stag = get_stagger();
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
624*4882a593Smuzhiyun 
625*4882a593Smuzhiyun 		if (!reset_value[i])
626*4882a593Smuzhiyun 			continue;
627*4882a593Smuzhiyun 
628*4882a593Smuzhiyun 		/*
629*4882a593Smuzhiyun 		 * there is some eccentricity in the hardware which
630*4882a593Smuzhiyun 		 * requires that we perform 2 extra corrections:
631*4882a593Smuzhiyun 		 *
632*4882a593Smuzhiyun 		 * - check both the CCCR:OVF flag for overflow and the
633*4882a593Smuzhiyun 		 *   counter high bit for un-flagged overflows.
634*4882a593Smuzhiyun 		 *
635*4882a593Smuzhiyun 		 * - write the counter back twice to ensure it gets
636*4882a593Smuzhiyun 		 *   updated properly.
637*4882a593Smuzhiyun 		 *
638*4882a593Smuzhiyun 		 * the former seems to be related to extra NMIs happening
639*4882a593Smuzhiyun 		 * during the current NMI; the latter is reported as errata
640*4882a593Smuzhiyun 		 * N15 in intel doc 249199-029, pentium 4 specification
641*4882a593Smuzhiyun 		 * update, though their suggested work-around does not
642*4882a593Smuzhiyun 		 * appear to solve the problem.
643*4882a593Smuzhiyun 		 */
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 		real = VIRT_CTR(stag, i);
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun 		rdmsr(p4_counters[real].cccr_address, low, high);
648*4882a593Smuzhiyun 		rdmsr(p4_counters[real].counter_address, ctr, high);
649*4882a593Smuzhiyun 		if (CCCR_OVF_P(low) || !(ctr & OP_CTR_OVERFLOW)) {
650*4882a593Smuzhiyun 			oprofile_add_sample(regs, i);
651*4882a593Smuzhiyun 			wrmsrl(p4_counters[real].counter_address,
652*4882a593Smuzhiyun 			       -(u64)reset_value[i]);
653*4882a593Smuzhiyun 			CCCR_CLEAR_OVF(low);
654*4882a593Smuzhiyun 			wrmsr(p4_counters[real].cccr_address, low, high);
655*4882a593Smuzhiyun 			wrmsrl(p4_counters[real].counter_address,
656*4882a593Smuzhiyun 			       -(u64)reset_value[i]);
657*4882a593Smuzhiyun 		}
658*4882a593Smuzhiyun 	}
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 	/* P4 quirk: you have to re-unmask the apic vector */
661*4882a593Smuzhiyun 	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
662*4882a593Smuzhiyun 
663*4882a593Smuzhiyun 	/* See op_model_ppro.c */
664*4882a593Smuzhiyun 	return 1;
665*4882a593Smuzhiyun }
666*4882a593Smuzhiyun 
667*4882a593Smuzhiyun 
p4_start(struct op_msrs const * const msrs)668*4882a593Smuzhiyun static void p4_start(struct op_msrs const * const msrs)
669*4882a593Smuzhiyun {
670*4882a593Smuzhiyun 	unsigned int low, high, stag;
671*4882a593Smuzhiyun 	int i;
672*4882a593Smuzhiyun 
673*4882a593Smuzhiyun 	stag = get_stagger();
674*4882a593Smuzhiyun 
675*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
676*4882a593Smuzhiyun 		if (!reset_value[i])
677*4882a593Smuzhiyun 			continue;
678*4882a593Smuzhiyun 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
679*4882a593Smuzhiyun 		CCCR_SET_ENABLE(low);
680*4882a593Smuzhiyun 		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
681*4882a593Smuzhiyun 	}
682*4882a593Smuzhiyun }
683*4882a593Smuzhiyun 
684*4882a593Smuzhiyun 
p4_stop(struct op_msrs const * const msrs)685*4882a593Smuzhiyun static void p4_stop(struct op_msrs const * const msrs)
686*4882a593Smuzhiyun {
687*4882a593Smuzhiyun 	unsigned int low, high, stag;
688*4882a593Smuzhiyun 	int i;
689*4882a593Smuzhiyun 
690*4882a593Smuzhiyun 	stag = get_stagger();
691*4882a593Smuzhiyun 
692*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
693*4882a593Smuzhiyun 		if (!reset_value[i])
694*4882a593Smuzhiyun 			continue;
695*4882a593Smuzhiyun 		rdmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
696*4882a593Smuzhiyun 		CCCR_SET_DISABLE(low);
697*4882a593Smuzhiyun 		wrmsr(p4_counters[VIRT_CTR(stag, i)].cccr_address, low, high);
698*4882a593Smuzhiyun 	}
699*4882a593Smuzhiyun }
700*4882a593Smuzhiyun 
701*4882a593Smuzhiyun #ifdef CONFIG_SMP
702*4882a593Smuzhiyun struct op_x86_model_spec op_p4_ht2_spec = {
703*4882a593Smuzhiyun 	.num_counters		= NUM_COUNTERS_HT2,
704*4882a593Smuzhiyun 	.num_controls		= NUM_CONTROLS_HT2,
705*4882a593Smuzhiyun 	.fill_in_addresses	= &p4_fill_in_addresses,
706*4882a593Smuzhiyun 	.setup_ctrs		= &p4_setup_ctrs,
707*4882a593Smuzhiyun 	.check_ctrs		= &p4_check_ctrs,
708*4882a593Smuzhiyun 	.start			= &p4_start,
709*4882a593Smuzhiyun 	.stop			= &p4_stop,
710*4882a593Smuzhiyun 	.shutdown		= &p4_shutdown
711*4882a593Smuzhiyun };
712*4882a593Smuzhiyun #endif
713*4882a593Smuzhiyun 
714*4882a593Smuzhiyun struct op_x86_model_spec op_p4_spec = {
715*4882a593Smuzhiyun 	.num_counters		= NUM_COUNTERS_NON_HT,
716*4882a593Smuzhiyun 	.num_controls		= NUM_CONTROLS_NON_HT,
717*4882a593Smuzhiyun 	.fill_in_addresses	= &p4_fill_in_addresses,
718*4882a593Smuzhiyun 	.setup_ctrs		= &p4_setup_ctrs,
719*4882a593Smuzhiyun 	.check_ctrs		= &p4_check_ctrs,
720*4882a593Smuzhiyun 	.start			= &p4_start,
721*4882a593Smuzhiyun 	.stop			= &p4_stop,
722*4882a593Smuzhiyun 	.shutdown		= &p4_shutdown
723*4882a593Smuzhiyun };
724