xref: /OK3568_Linux_fs/kernel/arch/alpha/oprofile/op_model_ev67.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun /**
2*4882a593Smuzhiyun  * @file arch/alpha/oprofile/op_model_ev67.c
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * @remark Copyright 2002 OProfile authors
5*4882a593Smuzhiyun  * @remark Read the file COPYING
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * @author Richard Henderson <rth@twiddle.net>
8*4882a593Smuzhiyun  * @author Falk Hueffner <falk@debian.org>
9*4882a593Smuzhiyun  */
10*4882a593Smuzhiyun 
11*4882a593Smuzhiyun #include <linux/oprofile.h>
12*4882a593Smuzhiyun #include <linux/smp.h>
13*4882a593Smuzhiyun #include <asm/ptrace.h>
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun #include "op_impl.h"
16*4882a593Smuzhiyun 
17*4882a593Smuzhiyun 
18*4882a593Smuzhiyun /* Compute all of the registers in preparation for enabling profiling.  */
19*4882a593Smuzhiyun 
20*4882a593Smuzhiyun static void
ev67_reg_setup(struct op_register_config * reg,struct op_counter_config * ctr,struct op_system_config * sys)21*4882a593Smuzhiyun ev67_reg_setup(struct op_register_config *reg,
22*4882a593Smuzhiyun 	       struct op_counter_config *ctr,
23*4882a593Smuzhiyun 	       struct op_system_config *sys)
24*4882a593Smuzhiyun {
25*4882a593Smuzhiyun 	unsigned long ctl, reset, need_reset, i;
26*4882a593Smuzhiyun 
27*4882a593Smuzhiyun 	/* Select desired events.  */
28*4882a593Smuzhiyun 	ctl = 1UL << 4;		/* Enable ProfileMe mode. */
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun 	/* The event numbers are chosen so we can use them directly if
31*4882a593Smuzhiyun 	   PCTR1 is enabled.  */
32*4882a593Smuzhiyun 	if (ctr[1].enabled) {
33*4882a593Smuzhiyun 		ctl |= (ctr[1].event & 3) << 2;
34*4882a593Smuzhiyun 	} else {
35*4882a593Smuzhiyun 		if (ctr[0].event == 0) /* cycles */
36*4882a593Smuzhiyun 			ctl |= 1UL << 2;
37*4882a593Smuzhiyun 	}
38*4882a593Smuzhiyun 	reg->mux_select = ctl;
39*4882a593Smuzhiyun 
40*4882a593Smuzhiyun 	/* Select logging options.  */
41*4882a593Smuzhiyun 	/* ??? Need to come up with some mechanism to trace only
42*4882a593Smuzhiyun 	   selected processes.  EV67 does not have a mechanism to
43*4882a593Smuzhiyun 	   select kernel or user mode only.  For now, enable always.  */
44*4882a593Smuzhiyun 	reg->proc_mode = 0;
45*4882a593Smuzhiyun 
46*4882a593Smuzhiyun 	/* EV67 cannot change the width of the counters as with the
47*4882a593Smuzhiyun 	   other implementations.  But fortunately, we can write to
48*4882a593Smuzhiyun 	   the counters and set the value such that it will overflow
49*4882a593Smuzhiyun 	   at the right time.  */
50*4882a593Smuzhiyun 	reset = need_reset = 0;
51*4882a593Smuzhiyun 	for (i = 0; i < 2; ++i) {
52*4882a593Smuzhiyun 		unsigned long count = ctr[i].count;
53*4882a593Smuzhiyun 		if (!ctr[i].enabled)
54*4882a593Smuzhiyun 			continue;
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun 		if (count > 0x100000)
57*4882a593Smuzhiyun 			count = 0x100000;
58*4882a593Smuzhiyun 		ctr[i].count = count;
59*4882a593Smuzhiyun 		reset |= (0x100000 - count) << (i ? 6 : 28);
60*4882a593Smuzhiyun 		if (count != 0x100000)
61*4882a593Smuzhiyun 			need_reset |= 1 << i;
62*4882a593Smuzhiyun 	}
63*4882a593Smuzhiyun 	reg->reset_values = reset;
64*4882a593Smuzhiyun 	reg->need_reset = need_reset;
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun /* Program all of the registers in preparation for enabling profiling.  */
68*4882a593Smuzhiyun 
69*4882a593Smuzhiyun static void
ev67_cpu_setup(void * x)70*4882a593Smuzhiyun ev67_cpu_setup (void *x)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun 	struct op_register_config *reg = x;
73*4882a593Smuzhiyun 
74*4882a593Smuzhiyun 	wrperfmon(2, reg->mux_select);
75*4882a593Smuzhiyun 	wrperfmon(3, reg->proc_mode);
76*4882a593Smuzhiyun 	wrperfmon(6, reg->reset_values | 3);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun /* CTR is a counter for which the user has requested an interrupt count
80*4882a593Smuzhiyun    in between one of the widths selectable in hardware.  Reset the count
81*4882a593Smuzhiyun    for CTR to the value stored in REG->RESET_VALUES.  */
82*4882a593Smuzhiyun 
83*4882a593Smuzhiyun static void
ev67_reset_ctr(struct op_register_config * reg,unsigned long ctr)84*4882a593Smuzhiyun ev67_reset_ctr(struct op_register_config *reg, unsigned long ctr)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	wrperfmon(6, reg->reset_values | (1 << ctr));
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun 
89*4882a593Smuzhiyun /* ProfileMe conditions which will show up as counters. We can also
90*4882a593Smuzhiyun    detect the following, but it seems unlikely that anybody is
91*4882a593Smuzhiyun    interested in counting them:
92*4882a593Smuzhiyun     * Reset
93*4882a593Smuzhiyun     * MT_FPCR (write to floating point control register)
94*4882a593Smuzhiyun     * Arithmetic trap
95*4882a593Smuzhiyun     * Dstream Fault
96*4882a593Smuzhiyun     * Machine Check (ECC fault, etc.)
97*4882a593Smuzhiyun     * OPCDEC (illegal opcode)
98*4882a593Smuzhiyun     * Floating point disabled
99*4882a593Smuzhiyun     * Differentiate between DTB single/double misses and 3 or 4 level
100*4882a593Smuzhiyun       page tables
101*4882a593Smuzhiyun     * Istream access violation
102*4882a593Smuzhiyun     * Interrupt
103*4882a593Smuzhiyun     * Icache Parity Error.
104*4882a593Smuzhiyun     * Instruction killed (nop, trapb)
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun    Unfortunately, there seems to be no way to detect Dcache and Bcache
107*4882a593Smuzhiyun    misses; the latter could be approximated by making the counter
108*4882a593Smuzhiyun    count Bcache misses, but that is not precise.
109*4882a593Smuzhiyun 
110*4882a593Smuzhiyun    We model this as 20 counters:
111*4882a593Smuzhiyun     * PCTR0
112*4882a593Smuzhiyun     * PCTR1
113*4882a593Smuzhiyun     * 9 ProfileMe events, induced by PCTR0
114*4882a593Smuzhiyun     * 9 ProfileMe events, induced by PCTR1
115*4882a593Smuzhiyun */
116*4882a593Smuzhiyun 
117*4882a593Smuzhiyun enum profileme_counters {
118*4882a593Smuzhiyun 	PM_STALLED,		/* Stalled for at least one cycle
119*4882a593Smuzhiyun 				   between the fetch and map stages  */
120*4882a593Smuzhiyun 	PM_TAKEN,		/* Conditional branch taken */
121*4882a593Smuzhiyun 	PM_MISPREDICT,		/* Branch caused mispredict trap */
122*4882a593Smuzhiyun 	PM_ITB_MISS,		/* ITB miss */
123*4882a593Smuzhiyun 	PM_DTB_MISS,		/* DTB miss */
124*4882a593Smuzhiyun 	PM_REPLAY,		/* Replay trap */
125*4882a593Smuzhiyun 	PM_LOAD_STORE,		/* Load-store order trap */
126*4882a593Smuzhiyun 	PM_ICACHE_MISS,		/* Icache miss */
127*4882a593Smuzhiyun 	PM_UNALIGNED,		/* Unaligned Load/Store */
128*4882a593Smuzhiyun 	PM_NUM_COUNTERS
129*4882a593Smuzhiyun };
130*4882a593Smuzhiyun 
131*4882a593Smuzhiyun static inline void
op_add_pm(unsigned long pc,int kern,unsigned long counter,struct op_counter_config * ctr,unsigned long event)132*4882a593Smuzhiyun op_add_pm(unsigned long pc, int kern, unsigned long counter,
133*4882a593Smuzhiyun 	  struct op_counter_config *ctr, unsigned long event)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun 	unsigned long fake_counter = 2 + event;
136*4882a593Smuzhiyun 	if (counter == 1)
137*4882a593Smuzhiyun 		fake_counter += PM_NUM_COUNTERS;
138*4882a593Smuzhiyun 	if (ctr[fake_counter].enabled)
139*4882a593Smuzhiyun 		oprofile_add_pc(pc, kern, fake_counter);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun static void
ev67_handle_interrupt(unsigned long which,struct pt_regs * regs,struct op_counter_config * ctr)143*4882a593Smuzhiyun ev67_handle_interrupt(unsigned long which, struct pt_regs *regs,
144*4882a593Smuzhiyun 		      struct op_counter_config *ctr)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun 	unsigned long pmpc, pctr_ctl;
147*4882a593Smuzhiyun 	int kern = !user_mode(regs);
148*4882a593Smuzhiyun 	int mispredict = 0;
149*4882a593Smuzhiyun 	union {
150*4882a593Smuzhiyun 		unsigned long v;
151*4882a593Smuzhiyun 		struct {
152*4882a593Smuzhiyun 			unsigned reserved:	30; /*  0-29 */
153*4882a593Smuzhiyun 			unsigned overcount:	 3; /* 30-32 */
154*4882a593Smuzhiyun 			unsigned icache_miss:	 1; /*    33 */
155*4882a593Smuzhiyun 			unsigned trap_type:	 4; /* 34-37 */
156*4882a593Smuzhiyun 			unsigned load_store:	 1; /*    38 */
157*4882a593Smuzhiyun 			unsigned trap:		 1; /*    39 */
158*4882a593Smuzhiyun 			unsigned mispredict:	 1; /*    40 */
159*4882a593Smuzhiyun 		} fields;
160*4882a593Smuzhiyun 	} i_stat;
161*4882a593Smuzhiyun 
162*4882a593Smuzhiyun 	enum trap_types {
163*4882a593Smuzhiyun 		TRAP_REPLAY,
164*4882a593Smuzhiyun 		TRAP_INVALID0,
165*4882a593Smuzhiyun 		TRAP_DTB_DOUBLE_MISS_3,
166*4882a593Smuzhiyun 		TRAP_DTB_DOUBLE_MISS_4,
167*4882a593Smuzhiyun 		TRAP_FP_DISABLED,
168*4882a593Smuzhiyun 		TRAP_UNALIGNED,
169*4882a593Smuzhiyun 		TRAP_DTB_SINGLE_MISS,
170*4882a593Smuzhiyun 		TRAP_DSTREAM_FAULT,
171*4882a593Smuzhiyun 		TRAP_OPCDEC,
172*4882a593Smuzhiyun 		TRAP_INVALID1,
173*4882a593Smuzhiyun 		TRAP_MACHINE_CHECK,
174*4882a593Smuzhiyun 		TRAP_INVALID2,
175*4882a593Smuzhiyun 		TRAP_ARITHMETIC,
176*4882a593Smuzhiyun 		TRAP_INVALID3,
177*4882a593Smuzhiyun 		TRAP_MT_FPCR,
178*4882a593Smuzhiyun 		TRAP_RESET
179*4882a593Smuzhiyun 	};
180*4882a593Smuzhiyun 
181*4882a593Smuzhiyun 	pmpc = wrperfmon(9, 0);
182*4882a593Smuzhiyun 	/* ??? Don't know how to handle physical-mode PALcode address.  */
183*4882a593Smuzhiyun 	if (pmpc & 1)
184*4882a593Smuzhiyun 		return;
185*4882a593Smuzhiyun 	pmpc &= ~2;		/* clear reserved bit */
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun 	i_stat.v = wrperfmon(8, 0);
188*4882a593Smuzhiyun 	if (i_stat.fields.trap) {
189*4882a593Smuzhiyun 		switch (i_stat.fields.trap_type) {
190*4882a593Smuzhiyun 		case TRAP_INVALID1:
191*4882a593Smuzhiyun 		case TRAP_INVALID2:
192*4882a593Smuzhiyun 		case TRAP_INVALID3:
193*4882a593Smuzhiyun 			/* Pipeline redirection occurred. PMPC points
194*4882a593Smuzhiyun 			   to PALcode. Recognize ITB miss by PALcode
195*4882a593Smuzhiyun 			   offset address, and get actual PC from
196*4882a593Smuzhiyun 			   EXC_ADDR.  */
197*4882a593Smuzhiyun 			oprofile_add_pc(regs->pc, kern, which);
198*4882a593Smuzhiyun 			if ((pmpc & ((1 << 15) - 1)) ==  581)
199*4882a593Smuzhiyun 				op_add_pm(regs->pc, kern, which,
200*4882a593Smuzhiyun 					  ctr, PM_ITB_MISS);
201*4882a593Smuzhiyun 			/* Most other bit and counter values will be
202*4882a593Smuzhiyun 			   those for the first instruction in the
203*4882a593Smuzhiyun 			   fault handler, so we're done.  */
204*4882a593Smuzhiyun 			return;
205*4882a593Smuzhiyun 		case TRAP_REPLAY:
206*4882a593Smuzhiyun 			op_add_pm(pmpc, kern, which, ctr,
207*4882a593Smuzhiyun 				  (i_stat.fields.load_store
208*4882a593Smuzhiyun 				   ? PM_LOAD_STORE : PM_REPLAY));
209*4882a593Smuzhiyun 			break;
210*4882a593Smuzhiyun 		case TRAP_DTB_DOUBLE_MISS_3:
211*4882a593Smuzhiyun 		case TRAP_DTB_DOUBLE_MISS_4:
212*4882a593Smuzhiyun 		case TRAP_DTB_SINGLE_MISS:
213*4882a593Smuzhiyun 			op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS);
214*4882a593Smuzhiyun 			break;
215*4882a593Smuzhiyun 		case TRAP_UNALIGNED:
216*4882a593Smuzhiyun 			op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED);
217*4882a593Smuzhiyun 			break;
218*4882a593Smuzhiyun 		case TRAP_INVALID0:
219*4882a593Smuzhiyun 		case TRAP_FP_DISABLED:
220*4882a593Smuzhiyun 		case TRAP_DSTREAM_FAULT:
221*4882a593Smuzhiyun 		case TRAP_OPCDEC:
222*4882a593Smuzhiyun 		case TRAP_MACHINE_CHECK:
223*4882a593Smuzhiyun 		case TRAP_ARITHMETIC:
224*4882a593Smuzhiyun 		case TRAP_MT_FPCR:
225*4882a593Smuzhiyun 		case TRAP_RESET:
226*4882a593Smuzhiyun 			break;
227*4882a593Smuzhiyun 		}
228*4882a593Smuzhiyun 
229*4882a593Smuzhiyun 		/* ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR
230*4882a593Smuzhiyun 		   mispredicts do not set this bit but can be
231*4882a593Smuzhiyun 		   recognized by the presence of one of these
232*4882a593Smuzhiyun 		   instructions at the PMPC location with bit 39
233*4882a593Smuzhiyun 		   set.  */
234*4882a593Smuzhiyun 		if (i_stat.fields.mispredict) {
235*4882a593Smuzhiyun 			mispredict = 1;
236*4882a593Smuzhiyun 			op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT);
237*4882a593Smuzhiyun 		}
238*4882a593Smuzhiyun 	}
239*4882a593Smuzhiyun 
240*4882a593Smuzhiyun 	oprofile_add_pc(pmpc, kern, which);
241*4882a593Smuzhiyun 
242*4882a593Smuzhiyun 	pctr_ctl = wrperfmon(5, 0);
243*4882a593Smuzhiyun 	if (pctr_ctl & (1UL << 27))
244*4882a593Smuzhiyun 		op_add_pm(pmpc, kern, which, ctr, PM_STALLED);
245*4882a593Smuzhiyun 
246*4882a593Smuzhiyun 	/* Unfortunately, TAK is undefined on mispredicted branches.
247*4882a593Smuzhiyun 	   ??? It is also undefined for non-cbranch insns, should
248*4882a593Smuzhiyun 	   check that.  */
249*4882a593Smuzhiyun 	if (!mispredict && pctr_ctl & (1UL << 0))
250*4882a593Smuzhiyun 		op_add_pm(pmpc, kern, which, ctr, PM_TAKEN);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun struct op_axp_model op_model_ev67 = {
254*4882a593Smuzhiyun 	.reg_setup		= ev67_reg_setup,
255*4882a593Smuzhiyun 	.cpu_setup		= ev67_cpu_setup,
256*4882a593Smuzhiyun 	.reset_ctr		= ev67_reset_ctr,
257*4882a593Smuzhiyun 	.handle_interrupt	= ev67_handle_interrupt,
258*4882a593Smuzhiyun 	.cpu_type		= "alpha/ev67",
259*4882a593Smuzhiyun 	.num_counters		= 20,
260*4882a593Smuzhiyun 	.can_set_proc_mode	= 0,
261*4882a593Smuzhiyun };
262