1*4882a593Smuzhiyun /**
2*4882a593Smuzhiyun * @file arch/alpha/oprofile/op_model_ev67.c
3*4882a593Smuzhiyun *
4*4882a593Smuzhiyun * @remark Copyright 2002 OProfile authors
5*4882a593Smuzhiyun * @remark Read the file COPYING
6*4882a593Smuzhiyun *
7*4882a593Smuzhiyun * @author Richard Henderson <rth@twiddle.net>
8*4882a593Smuzhiyun * @author Falk Hueffner <falk@debian.org>
9*4882a593Smuzhiyun */
10*4882a593Smuzhiyun
11*4882a593Smuzhiyun #include <linux/oprofile.h>
12*4882a593Smuzhiyun #include <linux/smp.h>
13*4882a593Smuzhiyun #include <asm/ptrace.h>
14*4882a593Smuzhiyun
15*4882a593Smuzhiyun #include "op_impl.h"
16*4882a593Smuzhiyun
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun /* Compute all of the registers in preparation for enabling profiling. */
19*4882a593Smuzhiyun
20*4882a593Smuzhiyun static void
ev67_reg_setup(struct op_register_config * reg,struct op_counter_config * ctr,struct op_system_config * sys)21*4882a593Smuzhiyun ev67_reg_setup(struct op_register_config *reg,
22*4882a593Smuzhiyun struct op_counter_config *ctr,
23*4882a593Smuzhiyun struct op_system_config *sys)
24*4882a593Smuzhiyun {
25*4882a593Smuzhiyun unsigned long ctl, reset, need_reset, i;
26*4882a593Smuzhiyun
27*4882a593Smuzhiyun /* Select desired events. */
28*4882a593Smuzhiyun ctl = 1UL << 4; /* Enable ProfileMe mode. */
29*4882a593Smuzhiyun
30*4882a593Smuzhiyun /* The event numbers are chosen so we can use them directly if
31*4882a593Smuzhiyun PCTR1 is enabled. */
32*4882a593Smuzhiyun if (ctr[1].enabled) {
33*4882a593Smuzhiyun ctl |= (ctr[1].event & 3) << 2;
34*4882a593Smuzhiyun } else {
35*4882a593Smuzhiyun if (ctr[0].event == 0) /* cycles */
36*4882a593Smuzhiyun ctl |= 1UL << 2;
37*4882a593Smuzhiyun }
38*4882a593Smuzhiyun reg->mux_select = ctl;
39*4882a593Smuzhiyun
40*4882a593Smuzhiyun /* Select logging options. */
41*4882a593Smuzhiyun /* ??? Need to come up with some mechanism to trace only
42*4882a593Smuzhiyun selected processes. EV67 does not have a mechanism to
43*4882a593Smuzhiyun select kernel or user mode only. For now, enable always. */
44*4882a593Smuzhiyun reg->proc_mode = 0;
45*4882a593Smuzhiyun
46*4882a593Smuzhiyun /* EV67 cannot change the width of the counters as with the
47*4882a593Smuzhiyun other implementations. But fortunately, we can write to
48*4882a593Smuzhiyun the counters and set the value such that it will overflow
49*4882a593Smuzhiyun at the right time. */
50*4882a593Smuzhiyun reset = need_reset = 0;
51*4882a593Smuzhiyun for (i = 0; i < 2; ++i) {
52*4882a593Smuzhiyun unsigned long count = ctr[i].count;
53*4882a593Smuzhiyun if (!ctr[i].enabled)
54*4882a593Smuzhiyun continue;
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun if (count > 0x100000)
57*4882a593Smuzhiyun count = 0x100000;
58*4882a593Smuzhiyun ctr[i].count = count;
59*4882a593Smuzhiyun reset |= (0x100000 - count) << (i ? 6 : 28);
60*4882a593Smuzhiyun if (count != 0x100000)
61*4882a593Smuzhiyun need_reset |= 1 << i;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun reg->reset_values = reset;
64*4882a593Smuzhiyun reg->need_reset = need_reset;
65*4882a593Smuzhiyun }
66*4882a593Smuzhiyun
67*4882a593Smuzhiyun /* Program all of the registers in preparation for enabling profiling. */
68*4882a593Smuzhiyun
69*4882a593Smuzhiyun static void
ev67_cpu_setup(void * x)70*4882a593Smuzhiyun ev67_cpu_setup (void *x)
71*4882a593Smuzhiyun {
72*4882a593Smuzhiyun struct op_register_config *reg = x;
73*4882a593Smuzhiyun
74*4882a593Smuzhiyun wrperfmon(2, reg->mux_select);
75*4882a593Smuzhiyun wrperfmon(3, reg->proc_mode);
76*4882a593Smuzhiyun wrperfmon(6, reg->reset_values | 3);
77*4882a593Smuzhiyun }
78*4882a593Smuzhiyun
79*4882a593Smuzhiyun /* CTR is a counter for which the user has requested an interrupt count
80*4882a593Smuzhiyun in between one of the widths selectable in hardware. Reset the count
81*4882a593Smuzhiyun for CTR to the value stored in REG->RESET_VALUES. */
82*4882a593Smuzhiyun
83*4882a593Smuzhiyun static void
ev67_reset_ctr(struct op_register_config * reg,unsigned long ctr)84*4882a593Smuzhiyun ev67_reset_ctr(struct op_register_config *reg, unsigned long ctr)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun wrperfmon(6, reg->reset_values | (1 << ctr));
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun
89*4882a593Smuzhiyun /* ProfileMe conditions which will show up as counters. We can also
90*4882a593Smuzhiyun detect the following, but it seems unlikely that anybody is
91*4882a593Smuzhiyun interested in counting them:
92*4882a593Smuzhiyun * Reset
93*4882a593Smuzhiyun * MT_FPCR (write to floating point control register)
94*4882a593Smuzhiyun * Arithmetic trap
95*4882a593Smuzhiyun * Dstream Fault
96*4882a593Smuzhiyun * Machine Check (ECC fault, etc.)
97*4882a593Smuzhiyun * OPCDEC (illegal opcode)
98*4882a593Smuzhiyun * Floating point disabled
99*4882a593Smuzhiyun * Differentiate between DTB single/double misses and 3 or 4 level
100*4882a593Smuzhiyun page tables
101*4882a593Smuzhiyun * Istream access violation
102*4882a593Smuzhiyun * Interrupt
103*4882a593Smuzhiyun * Icache Parity Error.
104*4882a593Smuzhiyun * Instruction killed (nop, trapb)
105*4882a593Smuzhiyun
106*4882a593Smuzhiyun Unfortunately, there seems to be no way to detect Dcache and Bcache
107*4882a593Smuzhiyun misses; the latter could be approximated by making the counter
108*4882a593Smuzhiyun count Bcache misses, but that is not precise.
109*4882a593Smuzhiyun
110*4882a593Smuzhiyun We model this as 20 counters:
111*4882a593Smuzhiyun * PCTR0
112*4882a593Smuzhiyun * PCTR1
113*4882a593Smuzhiyun * 9 ProfileMe events, induced by PCTR0
114*4882a593Smuzhiyun * 9 ProfileMe events, induced by PCTR1
115*4882a593Smuzhiyun */
116*4882a593Smuzhiyun
117*4882a593Smuzhiyun enum profileme_counters {
118*4882a593Smuzhiyun PM_STALLED, /* Stalled for at least one cycle
119*4882a593Smuzhiyun between the fetch and map stages */
120*4882a593Smuzhiyun PM_TAKEN, /* Conditional branch taken */
121*4882a593Smuzhiyun PM_MISPREDICT, /* Branch caused mispredict trap */
122*4882a593Smuzhiyun PM_ITB_MISS, /* ITB miss */
123*4882a593Smuzhiyun PM_DTB_MISS, /* DTB miss */
124*4882a593Smuzhiyun PM_REPLAY, /* Replay trap */
125*4882a593Smuzhiyun PM_LOAD_STORE, /* Load-store order trap */
126*4882a593Smuzhiyun PM_ICACHE_MISS, /* Icache miss */
127*4882a593Smuzhiyun PM_UNALIGNED, /* Unaligned Load/Store */
128*4882a593Smuzhiyun PM_NUM_COUNTERS
129*4882a593Smuzhiyun };
130*4882a593Smuzhiyun
131*4882a593Smuzhiyun static inline void
op_add_pm(unsigned long pc,int kern,unsigned long counter,struct op_counter_config * ctr,unsigned long event)132*4882a593Smuzhiyun op_add_pm(unsigned long pc, int kern, unsigned long counter,
133*4882a593Smuzhiyun struct op_counter_config *ctr, unsigned long event)
134*4882a593Smuzhiyun {
135*4882a593Smuzhiyun unsigned long fake_counter = 2 + event;
136*4882a593Smuzhiyun if (counter == 1)
137*4882a593Smuzhiyun fake_counter += PM_NUM_COUNTERS;
138*4882a593Smuzhiyun if (ctr[fake_counter].enabled)
139*4882a593Smuzhiyun oprofile_add_pc(pc, kern, fake_counter);
140*4882a593Smuzhiyun }
141*4882a593Smuzhiyun
142*4882a593Smuzhiyun static void
ev67_handle_interrupt(unsigned long which,struct pt_regs * regs,struct op_counter_config * ctr)143*4882a593Smuzhiyun ev67_handle_interrupt(unsigned long which, struct pt_regs *regs,
144*4882a593Smuzhiyun struct op_counter_config *ctr)
145*4882a593Smuzhiyun {
146*4882a593Smuzhiyun unsigned long pmpc, pctr_ctl;
147*4882a593Smuzhiyun int kern = !user_mode(regs);
148*4882a593Smuzhiyun int mispredict = 0;
149*4882a593Smuzhiyun union {
150*4882a593Smuzhiyun unsigned long v;
151*4882a593Smuzhiyun struct {
152*4882a593Smuzhiyun unsigned reserved: 30; /* 0-29 */
153*4882a593Smuzhiyun unsigned overcount: 3; /* 30-32 */
154*4882a593Smuzhiyun unsigned icache_miss: 1; /* 33 */
155*4882a593Smuzhiyun unsigned trap_type: 4; /* 34-37 */
156*4882a593Smuzhiyun unsigned load_store: 1; /* 38 */
157*4882a593Smuzhiyun unsigned trap: 1; /* 39 */
158*4882a593Smuzhiyun unsigned mispredict: 1; /* 40 */
159*4882a593Smuzhiyun } fields;
160*4882a593Smuzhiyun } i_stat;
161*4882a593Smuzhiyun
162*4882a593Smuzhiyun enum trap_types {
163*4882a593Smuzhiyun TRAP_REPLAY,
164*4882a593Smuzhiyun TRAP_INVALID0,
165*4882a593Smuzhiyun TRAP_DTB_DOUBLE_MISS_3,
166*4882a593Smuzhiyun TRAP_DTB_DOUBLE_MISS_4,
167*4882a593Smuzhiyun TRAP_FP_DISABLED,
168*4882a593Smuzhiyun TRAP_UNALIGNED,
169*4882a593Smuzhiyun TRAP_DTB_SINGLE_MISS,
170*4882a593Smuzhiyun TRAP_DSTREAM_FAULT,
171*4882a593Smuzhiyun TRAP_OPCDEC,
172*4882a593Smuzhiyun TRAP_INVALID1,
173*4882a593Smuzhiyun TRAP_MACHINE_CHECK,
174*4882a593Smuzhiyun TRAP_INVALID2,
175*4882a593Smuzhiyun TRAP_ARITHMETIC,
176*4882a593Smuzhiyun TRAP_INVALID3,
177*4882a593Smuzhiyun TRAP_MT_FPCR,
178*4882a593Smuzhiyun TRAP_RESET
179*4882a593Smuzhiyun };
180*4882a593Smuzhiyun
181*4882a593Smuzhiyun pmpc = wrperfmon(9, 0);
182*4882a593Smuzhiyun /* ??? Don't know how to handle physical-mode PALcode address. */
183*4882a593Smuzhiyun if (pmpc & 1)
184*4882a593Smuzhiyun return;
185*4882a593Smuzhiyun pmpc &= ~2; /* clear reserved bit */
186*4882a593Smuzhiyun
187*4882a593Smuzhiyun i_stat.v = wrperfmon(8, 0);
188*4882a593Smuzhiyun if (i_stat.fields.trap) {
189*4882a593Smuzhiyun switch (i_stat.fields.trap_type) {
190*4882a593Smuzhiyun case TRAP_INVALID1:
191*4882a593Smuzhiyun case TRAP_INVALID2:
192*4882a593Smuzhiyun case TRAP_INVALID3:
193*4882a593Smuzhiyun /* Pipeline redirection occurred. PMPC points
194*4882a593Smuzhiyun to PALcode. Recognize ITB miss by PALcode
195*4882a593Smuzhiyun offset address, and get actual PC from
196*4882a593Smuzhiyun EXC_ADDR. */
197*4882a593Smuzhiyun oprofile_add_pc(regs->pc, kern, which);
198*4882a593Smuzhiyun if ((pmpc & ((1 << 15) - 1)) == 581)
199*4882a593Smuzhiyun op_add_pm(regs->pc, kern, which,
200*4882a593Smuzhiyun ctr, PM_ITB_MISS);
201*4882a593Smuzhiyun /* Most other bit and counter values will be
202*4882a593Smuzhiyun those for the first instruction in the
203*4882a593Smuzhiyun fault handler, so we're done. */
204*4882a593Smuzhiyun return;
205*4882a593Smuzhiyun case TRAP_REPLAY:
206*4882a593Smuzhiyun op_add_pm(pmpc, kern, which, ctr,
207*4882a593Smuzhiyun (i_stat.fields.load_store
208*4882a593Smuzhiyun ? PM_LOAD_STORE : PM_REPLAY));
209*4882a593Smuzhiyun break;
210*4882a593Smuzhiyun case TRAP_DTB_DOUBLE_MISS_3:
211*4882a593Smuzhiyun case TRAP_DTB_DOUBLE_MISS_4:
212*4882a593Smuzhiyun case TRAP_DTB_SINGLE_MISS:
213*4882a593Smuzhiyun op_add_pm(pmpc, kern, which, ctr, PM_DTB_MISS);
214*4882a593Smuzhiyun break;
215*4882a593Smuzhiyun case TRAP_UNALIGNED:
216*4882a593Smuzhiyun op_add_pm(pmpc, kern, which, ctr, PM_UNALIGNED);
217*4882a593Smuzhiyun break;
218*4882a593Smuzhiyun case TRAP_INVALID0:
219*4882a593Smuzhiyun case TRAP_FP_DISABLED:
220*4882a593Smuzhiyun case TRAP_DSTREAM_FAULT:
221*4882a593Smuzhiyun case TRAP_OPCDEC:
222*4882a593Smuzhiyun case TRAP_MACHINE_CHECK:
223*4882a593Smuzhiyun case TRAP_ARITHMETIC:
224*4882a593Smuzhiyun case TRAP_MT_FPCR:
225*4882a593Smuzhiyun case TRAP_RESET:
226*4882a593Smuzhiyun break;
227*4882a593Smuzhiyun }
228*4882a593Smuzhiyun
229*4882a593Smuzhiyun /* ??? JSR/JMP/RET/COR or HW_JSR/HW_JMP/HW_RET/HW_COR
230*4882a593Smuzhiyun mispredicts do not set this bit but can be
231*4882a593Smuzhiyun recognized by the presence of one of these
232*4882a593Smuzhiyun instructions at the PMPC location with bit 39
233*4882a593Smuzhiyun set. */
234*4882a593Smuzhiyun if (i_stat.fields.mispredict) {
235*4882a593Smuzhiyun mispredict = 1;
236*4882a593Smuzhiyun op_add_pm(pmpc, kern, which, ctr, PM_MISPREDICT);
237*4882a593Smuzhiyun }
238*4882a593Smuzhiyun }
239*4882a593Smuzhiyun
240*4882a593Smuzhiyun oprofile_add_pc(pmpc, kern, which);
241*4882a593Smuzhiyun
242*4882a593Smuzhiyun pctr_ctl = wrperfmon(5, 0);
243*4882a593Smuzhiyun if (pctr_ctl & (1UL << 27))
244*4882a593Smuzhiyun op_add_pm(pmpc, kern, which, ctr, PM_STALLED);
245*4882a593Smuzhiyun
246*4882a593Smuzhiyun /* Unfortunately, TAK is undefined on mispredicted branches.
247*4882a593Smuzhiyun ??? It is also undefined for non-cbranch insns, should
248*4882a593Smuzhiyun check that. */
249*4882a593Smuzhiyun if (!mispredict && pctr_ctl & (1UL << 0))
250*4882a593Smuzhiyun op_add_pm(pmpc, kern, which, ctr, PM_TAKEN);
251*4882a593Smuzhiyun }
252*4882a593Smuzhiyun
253*4882a593Smuzhiyun struct op_axp_model op_model_ev67 = {
254*4882a593Smuzhiyun .reg_setup = ev67_reg_setup,
255*4882a593Smuzhiyun .cpu_setup = ev67_cpu_setup,
256*4882a593Smuzhiyun .reset_ctr = ev67_reset_ctr,
257*4882a593Smuzhiyun .handle_interrupt = ev67_handle_interrupt,
258*4882a593Smuzhiyun .cpu_type = "alpha/ev67",
259*4882a593Smuzhiyun .num_counters = 20,
260*4882a593Smuzhiyun .can_set_proc_mode = 0,
261*4882a593Smuzhiyun };
262