xref: /OK3568_Linux_fs/kernel/arch/powerpc/oprofile/op_model_cell.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0-or-later
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Cell Broadband Engine OProfile Support
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * (C) Copyright IBM Corporation 2006
6*4882a593Smuzhiyun  *
7*4882a593Smuzhiyun  * Author: David Erb (djerb@us.ibm.com)
8*4882a593Smuzhiyun  * Modifications:
9*4882a593Smuzhiyun  *	   Carl Love <carll@us.ibm.com>
10*4882a593Smuzhiyun  *	   Maynard Johnson <maynardj@us.ibm.com>
11*4882a593Smuzhiyun  */
12*4882a593Smuzhiyun 
13*4882a593Smuzhiyun #include <linux/cpufreq.h>
14*4882a593Smuzhiyun #include <linux/delay.h>
15*4882a593Smuzhiyun #include <linux/jiffies.h>
16*4882a593Smuzhiyun #include <linux/kthread.h>
17*4882a593Smuzhiyun #include <linux/oprofile.h>
18*4882a593Smuzhiyun #include <linux/percpu.h>
19*4882a593Smuzhiyun #include <linux/smp.h>
20*4882a593Smuzhiyun #include <linux/spinlock.h>
21*4882a593Smuzhiyun #include <linux/timer.h>
22*4882a593Smuzhiyun #include <asm/cell-pmu.h>
23*4882a593Smuzhiyun #include <asm/cputable.h>
24*4882a593Smuzhiyun #include <asm/firmware.h>
25*4882a593Smuzhiyun #include <asm/io.h>
26*4882a593Smuzhiyun #include <asm/oprofile_impl.h>
27*4882a593Smuzhiyun #include <asm/processor.h>
28*4882a593Smuzhiyun #include <asm/prom.h>
29*4882a593Smuzhiyun #include <asm/ptrace.h>
30*4882a593Smuzhiyun #include <asm/reg.h>
31*4882a593Smuzhiyun #include <asm/rtas.h>
32*4882a593Smuzhiyun #include <asm/cell-regs.h>
33*4882a593Smuzhiyun 
34*4882a593Smuzhiyun #include "../platforms/cell/interrupt.h"
35*4882a593Smuzhiyun #include "cell/pr_util.h"
36*4882a593Smuzhiyun 
37*4882a593Smuzhiyun #define PPU_PROFILING            0
38*4882a593Smuzhiyun #define SPU_PROFILING_CYCLES     1
39*4882a593Smuzhiyun #define SPU_PROFILING_EVENTS     2
40*4882a593Smuzhiyun 
41*4882a593Smuzhiyun #define SPU_EVENT_NUM_START      4100
42*4882a593Smuzhiyun #define SPU_EVENT_NUM_STOP       4399
43*4882a593Smuzhiyun #define SPU_PROFILE_EVENT_ADDR          4363  /* spu, address trace, decimal */
44*4882a593Smuzhiyun #define SPU_PROFILE_EVENT_ADDR_MASK_A   0x146 /* sub unit set to zero */
45*4882a593Smuzhiyun #define SPU_PROFILE_EVENT_ADDR_MASK_B   0x186 /* sub unit set to zero */
46*4882a593Smuzhiyun 
47*4882a593Smuzhiyun #define NUM_SPUS_PER_NODE    8
48*4882a593Smuzhiyun #define SPU_CYCLES_EVENT_NUM 2	/*  event number for SPU_CYCLES */
49*4882a593Smuzhiyun 
50*4882a593Smuzhiyun #define PPU_CYCLES_EVENT_NUM 1	/*  event number for CYCLES */
51*4882a593Smuzhiyun #define PPU_CYCLES_GRP_NUM   1	/* special group number for identifying
52*4882a593Smuzhiyun 				 * PPU_CYCLES event
53*4882a593Smuzhiyun 				 */
54*4882a593Smuzhiyun #define CBE_COUNT_ALL_CYCLES 0x42800000 /* PPU cycle event specifier */
55*4882a593Smuzhiyun 
56*4882a593Smuzhiyun #define NUM_THREADS 2         /* number of physical threads in
57*4882a593Smuzhiyun 			       * physical processor
58*4882a593Smuzhiyun 			       */
59*4882a593Smuzhiyun #define NUM_DEBUG_BUS_WORDS 4
60*4882a593Smuzhiyun #define NUM_INPUT_BUS_WORDS 2
61*4882a593Smuzhiyun 
62*4882a593Smuzhiyun #define MAX_SPU_COUNT 0xFFFFFF	/* maximum 24 bit LFSR value */
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun /* Minimum HW interval timer setting to send value to trace buffer is 10 cycle.
65*4882a593Smuzhiyun  * To configure counter to send value every N cycles set counter to
66*4882a593Smuzhiyun  * 2^32 - 1 - N.
67*4882a593Smuzhiyun  */
68*4882a593Smuzhiyun #define NUM_INTERVAL_CYC  0xFFFFFFFF - 10
69*4882a593Smuzhiyun 
70*4882a593Smuzhiyun /*
71*4882a593Smuzhiyun  * spu_cycle_reset is the number of cycles between samples.
72*4882a593Smuzhiyun  * This variable is used for SPU profiling and should ONLY be set
73*4882a593Smuzhiyun  * at the beginning of cell_reg_setup; otherwise, it's read-only.
74*4882a593Smuzhiyun  */
75*4882a593Smuzhiyun static unsigned int spu_cycle_reset;
76*4882a593Smuzhiyun static unsigned int profiling_mode;
77*4882a593Smuzhiyun static int spu_evnt_phys_spu_indx;
78*4882a593Smuzhiyun 
79*4882a593Smuzhiyun struct pmc_cntrl_data {
80*4882a593Smuzhiyun 	unsigned long vcntr;
81*4882a593Smuzhiyun 	unsigned long evnts;
82*4882a593Smuzhiyun 	unsigned long masks;
83*4882a593Smuzhiyun 	unsigned long enabled;
84*4882a593Smuzhiyun };
85*4882a593Smuzhiyun 
86*4882a593Smuzhiyun /*
87*4882a593Smuzhiyun  * ibm,cbe-perftools rtas parameters
88*4882a593Smuzhiyun  */
89*4882a593Smuzhiyun struct pm_signal {
90*4882a593Smuzhiyun 	u16 cpu;		/* Processor to modify */
91*4882a593Smuzhiyun 	u16 sub_unit;		/* hw subunit this applies to (if applicable)*/
92*4882a593Smuzhiyun 	short int signal_group; /* Signal Group to Enable/Disable */
93*4882a593Smuzhiyun 	u8 bus_word;		/* Enable/Disable on this Trace/Trigger/Event
94*4882a593Smuzhiyun 				 * Bus Word(s) (bitmask)
95*4882a593Smuzhiyun 				 */
96*4882a593Smuzhiyun 	u8 bit;			/* Trigger/Event bit (if applicable) */
97*4882a593Smuzhiyun };
98*4882a593Smuzhiyun 
99*4882a593Smuzhiyun /*
100*4882a593Smuzhiyun  * rtas call arguments
101*4882a593Smuzhiyun  */
102*4882a593Smuzhiyun enum {
103*4882a593Smuzhiyun 	SUBFUNC_RESET = 1,
104*4882a593Smuzhiyun 	SUBFUNC_ACTIVATE = 2,
105*4882a593Smuzhiyun 	SUBFUNC_DEACTIVATE = 3,
106*4882a593Smuzhiyun 
107*4882a593Smuzhiyun 	PASSTHRU_IGNORE = 0,
108*4882a593Smuzhiyun 	PASSTHRU_ENABLE = 1,
109*4882a593Smuzhiyun 	PASSTHRU_DISABLE = 2,
110*4882a593Smuzhiyun };
111*4882a593Smuzhiyun 
112*4882a593Smuzhiyun struct pm_cntrl {
113*4882a593Smuzhiyun 	u16 enable;
114*4882a593Smuzhiyun 	u16 stop_at_max;
115*4882a593Smuzhiyun 	u16 trace_mode;
116*4882a593Smuzhiyun 	u16 freeze;
117*4882a593Smuzhiyun 	u16 count_mode;
118*4882a593Smuzhiyun 	u16 spu_addr_trace;
119*4882a593Smuzhiyun 	u8  trace_buf_ovflw;
120*4882a593Smuzhiyun };
121*4882a593Smuzhiyun 
122*4882a593Smuzhiyun static struct {
123*4882a593Smuzhiyun 	u32 group_control;
124*4882a593Smuzhiyun 	u32 debug_bus_control;
125*4882a593Smuzhiyun 	struct pm_cntrl pm_cntrl;
126*4882a593Smuzhiyun 	u32 pm07_cntrl[NR_PHYS_CTRS];
127*4882a593Smuzhiyun } pm_regs;
128*4882a593Smuzhiyun 
129*4882a593Smuzhiyun #define GET_SUB_UNIT(x) ((x & 0x0000f000) >> 12)
130*4882a593Smuzhiyun #define GET_BUS_WORD(x) ((x & 0x000000f0) >> 4)
131*4882a593Smuzhiyun #define GET_BUS_TYPE(x) ((x & 0x00000300) >> 8)
132*4882a593Smuzhiyun #define GET_POLARITY(x) ((x & 0x00000002) >> 1)
133*4882a593Smuzhiyun #define GET_COUNT_CYCLES(x) (x & 0x00000001)
134*4882a593Smuzhiyun #define GET_INPUT_CONTROL(x) ((x & 0x00000004) >> 2)
135*4882a593Smuzhiyun 
136*4882a593Smuzhiyun static DEFINE_PER_CPU(unsigned long[NR_PHYS_CTRS], pmc_values);
137*4882a593Smuzhiyun static unsigned long spu_pm_cnt[MAX_NUMNODES * NUM_SPUS_PER_NODE];
138*4882a593Smuzhiyun static struct pmc_cntrl_data pmc_cntrl[NUM_THREADS][NR_PHYS_CTRS];
139*4882a593Smuzhiyun 
140*4882a593Smuzhiyun /*
141*4882a593Smuzhiyun  * The CELL profiling code makes rtas calls to setup the debug bus to
142*4882a593Smuzhiyun  * route the performance signals.  Additionally, SPU profiling requires
143*4882a593Smuzhiyun  * a second rtas call to setup the hardware to capture the SPU PCs.
144*4882a593Smuzhiyun  * The EIO error value is returned if the token lookups or the rtas
145*4882a593Smuzhiyun  * call fail.  The EIO error number is the best choice of the existing
146*4882a593Smuzhiyun  * error numbers.  The probability of rtas related error is very low.  But
147*4882a593Smuzhiyun  * by returning EIO and printing additional information to dmsg the user
148*4882a593Smuzhiyun  * will know that OProfile did not start and dmesg will tell them why.
149*4882a593Smuzhiyun  * OProfile does not support returning errors on Stop.	Not a huge issue
150*4882a593Smuzhiyun  * since failure to reset the debug bus or stop the SPU PC collection is
151*4882a593Smuzhiyun  * not a fatel issue.  Chances are if the Stop failed, Start doesn't work
152*4882a593Smuzhiyun  * either.
153*4882a593Smuzhiyun  */
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun /*
156*4882a593Smuzhiyun  * Interpetation of hdw_thread:
157*4882a593Smuzhiyun  * 0 - even virtual cpus 0, 2, 4,...
158*4882a593Smuzhiyun  * 1 - odd virtual cpus 1, 3, 5, ...
159*4882a593Smuzhiyun  *
160*4882a593Smuzhiyun  * FIXME: this is strictly wrong, we need to clean this up in a number
161*4882a593Smuzhiyun  * of places. It works for now. -arnd
162*4882a593Smuzhiyun  */
163*4882a593Smuzhiyun static u32 hdw_thread;
164*4882a593Smuzhiyun 
165*4882a593Smuzhiyun static u32 virt_cntr_inter_mask;
166*4882a593Smuzhiyun static struct timer_list timer_virt_cntr;
167*4882a593Smuzhiyun static struct timer_list timer_spu_event_swap;
168*4882a593Smuzhiyun 
169*4882a593Smuzhiyun /*
170*4882a593Smuzhiyun  * pm_signal needs to be global since it is initialized in
171*4882a593Smuzhiyun  * cell_reg_setup at the time when the necessary information
172*4882a593Smuzhiyun  * is available.
173*4882a593Smuzhiyun  */
174*4882a593Smuzhiyun static struct pm_signal pm_signal[NR_PHYS_CTRS];
175*4882a593Smuzhiyun static int pm_rtas_token;    /* token for debug bus setup call */
176*4882a593Smuzhiyun static int spu_rtas_token;   /* token for SPU cycle profiling */
177*4882a593Smuzhiyun 
178*4882a593Smuzhiyun static u32 reset_value[NR_PHYS_CTRS];
179*4882a593Smuzhiyun static int num_counters;
180*4882a593Smuzhiyun static int oprofile_running;
181*4882a593Smuzhiyun static DEFINE_SPINLOCK(cntr_lock);
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun static u32 ctr_enabled;
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun static unsigned char input_bus[NUM_INPUT_BUS_WORDS];
186*4882a593Smuzhiyun 
187*4882a593Smuzhiyun /*
188*4882a593Smuzhiyun  * Firmware interface functions
189*4882a593Smuzhiyun  */
190*4882a593Smuzhiyun static int
rtas_ibm_cbe_perftools(int subfunc,int passthru,void * address,unsigned long length)191*4882a593Smuzhiyun rtas_ibm_cbe_perftools(int subfunc, int passthru,
192*4882a593Smuzhiyun 		       void *address, unsigned long length)
193*4882a593Smuzhiyun {
194*4882a593Smuzhiyun 	u64 paddr = __pa(address);
195*4882a593Smuzhiyun 
196*4882a593Smuzhiyun 	return rtas_call(pm_rtas_token, 5, 1, NULL, subfunc,
197*4882a593Smuzhiyun 			 passthru, paddr >> 32, paddr & 0xffffffff, length);
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun 
pm_rtas_reset_signals(u32 node)200*4882a593Smuzhiyun static void pm_rtas_reset_signals(u32 node)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	int ret;
203*4882a593Smuzhiyun 	struct pm_signal pm_signal_local;
204*4882a593Smuzhiyun 
205*4882a593Smuzhiyun 	/*
206*4882a593Smuzhiyun 	 * The debug bus is being set to the passthru disable state.
207*4882a593Smuzhiyun 	 * However, the FW still expects at least one legal signal routing
208*4882a593Smuzhiyun 	 * entry or it will return an error on the arguments.	If we don't
209*4882a593Smuzhiyun 	 * supply a valid entry, we must ignore all return values.  Ignoring
210*4882a593Smuzhiyun 	 * all return values means we might miss an error we should be
211*4882a593Smuzhiyun 	 * concerned about.
212*4882a593Smuzhiyun 	 */
213*4882a593Smuzhiyun 
214*4882a593Smuzhiyun 	/*  fw expects physical cpu #. */
215*4882a593Smuzhiyun 	pm_signal_local.cpu = node;
216*4882a593Smuzhiyun 	pm_signal_local.signal_group = 21;
217*4882a593Smuzhiyun 	pm_signal_local.bus_word = 1;
218*4882a593Smuzhiyun 	pm_signal_local.sub_unit = 0;
219*4882a593Smuzhiyun 	pm_signal_local.bit = 0;
220*4882a593Smuzhiyun 
221*4882a593Smuzhiyun 	ret = rtas_ibm_cbe_perftools(SUBFUNC_RESET, PASSTHRU_DISABLE,
222*4882a593Smuzhiyun 				     &pm_signal_local,
223*4882a593Smuzhiyun 				     sizeof(struct pm_signal));
224*4882a593Smuzhiyun 
225*4882a593Smuzhiyun 	if (unlikely(ret))
226*4882a593Smuzhiyun 		/*
227*4882a593Smuzhiyun 		 * Not a fatal error. For Oprofile stop, the oprofile
228*4882a593Smuzhiyun 		 * functions do not support returning an error for
229*4882a593Smuzhiyun 		 * failure to stop OProfile.
230*4882a593Smuzhiyun 		 */
231*4882a593Smuzhiyun 		printk(KERN_WARNING "%s: rtas returned: %d\n",
232*4882a593Smuzhiyun 		       __func__, ret);
233*4882a593Smuzhiyun }
234*4882a593Smuzhiyun 
pm_rtas_activate_signals(u32 node,u32 count)235*4882a593Smuzhiyun static int pm_rtas_activate_signals(u32 node, u32 count)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	int ret;
238*4882a593Smuzhiyun 	int i, j;
239*4882a593Smuzhiyun 	struct pm_signal pm_signal_local[NR_PHYS_CTRS];
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	/*
242*4882a593Smuzhiyun 	 * There is no debug setup required for the cycles event.
243*4882a593Smuzhiyun 	 * Note that only events in the same group can be used.
244*4882a593Smuzhiyun 	 * Otherwise, there will be conflicts in correctly routing
245*4882a593Smuzhiyun 	 * the signals on the debug bus.  It is the responsibility
246*4882a593Smuzhiyun 	 * of the OProfile user tool to check the events are in
247*4882a593Smuzhiyun 	 * the same group.
248*4882a593Smuzhiyun 	 */
249*4882a593Smuzhiyun 	i = 0;
250*4882a593Smuzhiyun 	for (j = 0; j < count; j++) {
251*4882a593Smuzhiyun 		if (pm_signal[j].signal_group != PPU_CYCLES_GRP_NUM) {
252*4882a593Smuzhiyun 
253*4882a593Smuzhiyun 			/* fw expects physical cpu # */
254*4882a593Smuzhiyun 			pm_signal_local[i].cpu = node;
255*4882a593Smuzhiyun 			pm_signal_local[i].signal_group
256*4882a593Smuzhiyun 				= pm_signal[j].signal_group;
257*4882a593Smuzhiyun 			pm_signal_local[i].bus_word = pm_signal[j].bus_word;
258*4882a593Smuzhiyun 			pm_signal_local[i].sub_unit = pm_signal[j].sub_unit;
259*4882a593Smuzhiyun 			pm_signal_local[i].bit = pm_signal[j].bit;
260*4882a593Smuzhiyun 			i++;
261*4882a593Smuzhiyun 		}
262*4882a593Smuzhiyun 	}
263*4882a593Smuzhiyun 
264*4882a593Smuzhiyun 	if (i != 0) {
265*4882a593Smuzhiyun 		ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE, PASSTHRU_ENABLE,
266*4882a593Smuzhiyun 					     pm_signal_local,
267*4882a593Smuzhiyun 					     i * sizeof(struct pm_signal));
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun 		if (unlikely(ret)) {
270*4882a593Smuzhiyun 			printk(KERN_WARNING "%s: rtas returned: %d\n",
271*4882a593Smuzhiyun 			       __func__, ret);
272*4882a593Smuzhiyun 			return -EIO;
273*4882a593Smuzhiyun 		}
274*4882a593Smuzhiyun 	}
275*4882a593Smuzhiyun 
276*4882a593Smuzhiyun 	return 0;
277*4882a593Smuzhiyun }
278*4882a593Smuzhiyun 
279*4882a593Smuzhiyun /*
280*4882a593Smuzhiyun  * PM Signal functions
281*4882a593Smuzhiyun  */
set_pm_event(u32 ctr,int event,u32 unit_mask)282*4882a593Smuzhiyun static void set_pm_event(u32 ctr, int event, u32 unit_mask)
283*4882a593Smuzhiyun {
284*4882a593Smuzhiyun 	struct pm_signal *p;
285*4882a593Smuzhiyun 	u32 signal_bit;
286*4882a593Smuzhiyun 	u32 bus_word, bus_type, count_cycles, polarity, input_control;
287*4882a593Smuzhiyun 	int j, i;
288*4882a593Smuzhiyun 
289*4882a593Smuzhiyun 	if (event == PPU_CYCLES_EVENT_NUM) {
290*4882a593Smuzhiyun 		/* Special Event: Count all cpu cycles */
291*4882a593Smuzhiyun 		pm_regs.pm07_cntrl[ctr] = CBE_COUNT_ALL_CYCLES;
292*4882a593Smuzhiyun 		p = &(pm_signal[ctr]);
293*4882a593Smuzhiyun 		p->signal_group = PPU_CYCLES_GRP_NUM;
294*4882a593Smuzhiyun 		p->bus_word = 1;
295*4882a593Smuzhiyun 		p->sub_unit = 0;
296*4882a593Smuzhiyun 		p->bit = 0;
297*4882a593Smuzhiyun 		goto out;
298*4882a593Smuzhiyun 	} else {
299*4882a593Smuzhiyun 		pm_regs.pm07_cntrl[ctr] = 0;
300*4882a593Smuzhiyun 	}
301*4882a593Smuzhiyun 
302*4882a593Smuzhiyun 	bus_word = GET_BUS_WORD(unit_mask);
303*4882a593Smuzhiyun 	bus_type = GET_BUS_TYPE(unit_mask);
304*4882a593Smuzhiyun 	count_cycles = GET_COUNT_CYCLES(unit_mask);
305*4882a593Smuzhiyun 	polarity = GET_POLARITY(unit_mask);
306*4882a593Smuzhiyun 	input_control = GET_INPUT_CONTROL(unit_mask);
307*4882a593Smuzhiyun 	signal_bit = (event % 100);
308*4882a593Smuzhiyun 
309*4882a593Smuzhiyun 	p = &(pm_signal[ctr]);
310*4882a593Smuzhiyun 
311*4882a593Smuzhiyun 	p->signal_group = event / 100;
312*4882a593Smuzhiyun 	p->bus_word = bus_word;
313*4882a593Smuzhiyun 	p->sub_unit = GET_SUB_UNIT(unit_mask);
314*4882a593Smuzhiyun 
315*4882a593Smuzhiyun 	pm_regs.pm07_cntrl[ctr] = 0;
316*4882a593Smuzhiyun 	pm_regs.pm07_cntrl[ctr] |= PM07_CTR_COUNT_CYCLES(count_cycles);
317*4882a593Smuzhiyun 	pm_regs.pm07_cntrl[ctr] |= PM07_CTR_POLARITY(polarity);
318*4882a593Smuzhiyun 	pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_CONTROL(input_control);
319*4882a593Smuzhiyun 
320*4882a593Smuzhiyun 	/*
321*4882a593Smuzhiyun 	 * Some of the islands signal selection is based on 64 bit words.
322*4882a593Smuzhiyun 	 * The debug bus words are 32 bits, the input words to the performance
323*4882a593Smuzhiyun 	 * counters are defined as 32 bits.  Need to convert the 64 bit island
324*4882a593Smuzhiyun 	 * specification to the appropriate 32 input bit and bus word for the
325*4882a593Smuzhiyun 	 * performance counter event selection.	 See the CELL Performance
326*4882a593Smuzhiyun 	 * monitoring signals manual and the Perf cntr hardware descriptions
327*4882a593Smuzhiyun 	 * for the details.
328*4882a593Smuzhiyun 	 */
329*4882a593Smuzhiyun 	if (input_control == 0) {
330*4882a593Smuzhiyun 		if (signal_bit > 31) {
331*4882a593Smuzhiyun 			signal_bit -= 32;
332*4882a593Smuzhiyun 			if (bus_word == 0x3)
333*4882a593Smuzhiyun 				bus_word = 0x2;
334*4882a593Smuzhiyun 			else if (bus_word == 0xc)
335*4882a593Smuzhiyun 				bus_word = 0x8;
336*4882a593Smuzhiyun 		}
337*4882a593Smuzhiyun 
338*4882a593Smuzhiyun 		if ((bus_type == 0) && p->signal_group >= 60)
339*4882a593Smuzhiyun 			bus_type = 2;
340*4882a593Smuzhiyun 		if ((bus_type == 1) && p->signal_group >= 50)
341*4882a593Smuzhiyun 			bus_type = 0;
342*4882a593Smuzhiyun 
343*4882a593Smuzhiyun 		pm_regs.pm07_cntrl[ctr] |= PM07_CTR_INPUT_MUX(signal_bit);
344*4882a593Smuzhiyun 	} else {
345*4882a593Smuzhiyun 		pm_regs.pm07_cntrl[ctr] = 0;
346*4882a593Smuzhiyun 		p->bit = signal_bit;
347*4882a593Smuzhiyun 	}
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	for (i = 0; i < NUM_DEBUG_BUS_WORDS; i++) {
350*4882a593Smuzhiyun 		if (bus_word & (1 << i)) {
351*4882a593Smuzhiyun 			pm_regs.debug_bus_control |=
352*4882a593Smuzhiyun 				(bus_type << (30 - (2 * i)));
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 			for (j = 0; j < NUM_INPUT_BUS_WORDS; j++) {
355*4882a593Smuzhiyun 				if (input_bus[j] == 0xff) {
356*4882a593Smuzhiyun 					input_bus[j] = i;
357*4882a593Smuzhiyun 					pm_regs.group_control |=
358*4882a593Smuzhiyun 						(i << (30 - (2 * j)));
359*4882a593Smuzhiyun 
360*4882a593Smuzhiyun 					break;
361*4882a593Smuzhiyun 				}
362*4882a593Smuzhiyun 			}
363*4882a593Smuzhiyun 		}
364*4882a593Smuzhiyun 	}
365*4882a593Smuzhiyun out:
366*4882a593Smuzhiyun 	;
367*4882a593Smuzhiyun }
368*4882a593Smuzhiyun 
write_pm_cntrl(int cpu)369*4882a593Smuzhiyun static void write_pm_cntrl(int cpu)
370*4882a593Smuzhiyun {
371*4882a593Smuzhiyun 	/*
372*4882a593Smuzhiyun 	 * Oprofile will use 32 bit counters, set bits 7:10 to 0
373*4882a593Smuzhiyun 	 * pmregs.pm_cntrl is a global
374*4882a593Smuzhiyun 	 */
375*4882a593Smuzhiyun 
376*4882a593Smuzhiyun 	u32 val = 0;
377*4882a593Smuzhiyun 	if (pm_regs.pm_cntrl.enable == 1)
378*4882a593Smuzhiyun 		val |= CBE_PM_ENABLE_PERF_MON;
379*4882a593Smuzhiyun 
380*4882a593Smuzhiyun 	if (pm_regs.pm_cntrl.stop_at_max == 1)
381*4882a593Smuzhiyun 		val |= CBE_PM_STOP_AT_MAX;
382*4882a593Smuzhiyun 
383*4882a593Smuzhiyun 	if (pm_regs.pm_cntrl.trace_mode != 0)
384*4882a593Smuzhiyun 		val |= CBE_PM_TRACE_MODE_SET(pm_regs.pm_cntrl.trace_mode);
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 	if (pm_regs.pm_cntrl.trace_buf_ovflw == 1)
387*4882a593Smuzhiyun 		val |= CBE_PM_TRACE_BUF_OVFLW(pm_regs.pm_cntrl.trace_buf_ovflw);
388*4882a593Smuzhiyun 	if (pm_regs.pm_cntrl.freeze == 1)
389*4882a593Smuzhiyun 		val |= CBE_PM_FREEZE_ALL_CTRS;
390*4882a593Smuzhiyun 
391*4882a593Smuzhiyun 	val |= CBE_PM_SPU_ADDR_TRACE_SET(pm_regs.pm_cntrl.spu_addr_trace);
392*4882a593Smuzhiyun 
393*4882a593Smuzhiyun 	/*
394*4882a593Smuzhiyun 	 * Routine set_count_mode must be called previously to set
395*4882a593Smuzhiyun 	 * the count mode based on the user selection of user and kernel.
396*4882a593Smuzhiyun 	 */
397*4882a593Smuzhiyun 	val |= CBE_PM_COUNT_MODE_SET(pm_regs.pm_cntrl.count_mode);
398*4882a593Smuzhiyun 	cbe_write_pm(cpu, pm_control, val);
399*4882a593Smuzhiyun }
400*4882a593Smuzhiyun 
401*4882a593Smuzhiyun static inline void
set_count_mode(u32 kernel,u32 user)402*4882a593Smuzhiyun set_count_mode(u32 kernel, u32 user)
403*4882a593Smuzhiyun {
404*4882a593Smuzhiyun 	/*
405*4882a593Smuzhiyun 	 * The user must specify user and kernel if they want them. If
406*4882a593Smuzhiyun 	 *  neither is specified, OProfile will count in hypervisor mode.
407*4882a593Smuzhiyun 	 *  pm_regs.pm_cntrl is a global
408*4882a593Smuzhiyun 	 */
409*4882a593Smuzhiyun 	if (kernel) {
410*4882a593Smuzhiyun 		if (user)
411*4882a593Smuzhiyun 			pm_regs.pm_cntrl.count_mode = CBE_COUNT_ALL_MODES;
412*4882a593Smuzhiyun 		else
413*4882a593Smuzhiyun 			pm_regs.pm_cntrl.count_mode =
414*4882a593Smuzhiyun 				CBE_COUNT_SUPERVISOR_MODE;
415*4882a593Smuzhiyun 	} else {
416*4882a593Smuzhiyun 		if (user)
417*4882a593Smuzhiyun 			pm_regs.pm_cntrl.count_mode = CBE_COUNT_PROBLEM_MODE;
418*4882a593Smuzhiyun 		else
419*4882a593Smuzhiyun 			pm_regs.pm_cntrl.count_mode =
420*4882a593Smuzhiyun 				CBE_COUNT_HYPERVISOR_MODE;
421*4882a593Smuzhiyun 	}
422*4882a593Smuzhiyun }
423*4882a593Smuzhiyun 
enable_ctr(u32 cpu,u32 ctr,u32 * pm07_cntrl)424*4882a593Smuzhiyun static inline void enable_ctr(u32 cpu, u32 ctr, u32 *pm07_cntrl)
425*4882a593Smuzhiyun {
426*4882a593Smuzhiyun 
427*4882a593Smuzhiyun 	pm07_cntrl[ctr] |= CBE_PM_CTR_ENABLE;
428*4882a593Smuzhiyun 	cbe_write_pm07_control(cpu, ctr, pm07_cntrl[ctr]);
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun  * Oprofile is expected to collect data on all CPUs simultaneously.
433*4882a593Smuzhiyun  * However, there is one set of performance counters per node.	There are
434*4882a593Smuzhiyun  * two hardware threads or virtual CPUs on each node.  Hence, OProfile must
435*4882a593Smuzhiyun  * multiplex in time the performance counter collection on the two virtual
436*4882a593Smuzhiyun  * CPUs.  The multiplexing of the performance counters is done by this
437*4882a593Smuzhiyun  * virtual counter routine.
438*4882a593Smuzhiyun  *
439*4882a593Smuzhiyun  * The pmc_values used below is defined as 'per-cpu' but its use is
440*4882a593Smuzhiyun  * more akin to 'per-node'.  We need to store two sets of counter
441*4882a593Smuzhiyun  * values per node -- one for the previous run and one for the next.
442*4882a593Smuzhiyun  * The per-cpu[NR_PHYS_CTRS] gives us the storage we need.  Each odd/even
443*4882a593Smuzhiyun  * pair of per-cpu arrays is used for storing the previous and next
444*4882a593Smuzhiyun  * pmc values for a given node.
445*4882a593Smuzhiyun  * NOTE: We use the per-cpu variable to improve cache performance.
446*4882a593Smuzhiyun  *
447*4882a593Smuzhiyun  * This routine will alternate loading the virtual counters for
448*4882a593Smuzhiyun  * virtual CPUs
449*4882a593Smuzhiyun  */
cell_virtual_cntr(struct timer_list * unused)450*4882a593Smuzhiyun static void cell_virtual_cntr(struct timer_list *unused)
451*4882a593Smuzhiyun {
452*4882a593Smuzhiyun 	int i, prev_hdw_thread, next_hdw_thread;
453*4882a593Smuzhiyun 	u32 cpu;
454*4882a593Smuzhiyun 	unsigned long flags;
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	/*
457*4882a593Smuzhiyun 	 * Make sure that the interrupt_hander and the virt counter are
458*4882a593Smuzhiyun 	 * not both playing with the counters on the same node.
459*4882a593Smuzhiyun 	 */
460*4882a593Smuzhiyun 
461*4882a593Smuzhiyun 	spin_lock_irqsave(&cntr_lock, flags);
462*4882a593Smuzhiyun 
463*4882a593Smuzhiyun 	prev_hdw_thread = hdw_thread;
464*4882a593Smuzhiyun 
465*4882a593Smuzhiyun 	/* switch the cpu handling the interrupts */
466*4882a593Smuzhiyun 	hdw_thread = 1 ^ hdw_thread;
467*4882a593Smuzhiyun 	next_hdw_thread = hdw_thread;
468*4882a593Smuzhiyun 
469*4882a593Smuzhiyun 	pm_regs.group_control = 0;
470*4882a593Smuzhiyun 	pm_regs.debug_bus_control = 0;
471*4882a593Smuzhiyun 
472*4882a593Smuzhiyun 	for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
473*4882a593Smuzhiyun 		input_bus[i] = 0xff;
474*4882a593Smuzhiyun 
475*4882a593Smuzhiyun 	/*
476*4882a593Smuzhiyun 	 * There are some per thread events.  Must do the
477*4882a593Smuzhiyun 	 * set event, for the thread that is being started
478*4882a593Smuzhiyun 	 */
479*4882a593Smuzhiyun 	for (i = 0; i < num_counters; i++)
480*4882a593Smuzhiyun 		set_pm_event(i,
481*4882a593Smuzhiyun 			pmc_cntrl[next_hdw_thread][i].evnts,
482*4882a593Smuzhiyun 			pmc_cntrl[next_hdw_thread][i].masks);
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 	/*
485*4882a593Smuzhiyun 	 * The following is done only once per each node, but
486*4882a593Smuzhiyun 	 * we need cpu #, not node #, to pass to the cbe_xxx functions.
487*4882a593Smuzhiyun 	 */
488*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
489*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
490*4882a593Smuzhiyun 			continue;
491*4882a593Smuzhiyun 
492*4882a593Smuzhiyun 		/*
493*4882a593Smuzhiyun 		 * stop counters, save counter values, restore counts
494*4882a593Smuzhiyun 		 * for previous thread
495*4882a593Smuzhiyun 		 */
496*4882a593Smuzhiyun 		cbe_disable_pm(cpu);
497*4882a593Smuzhiyun 		cbe_disable_pm_interrupts(cpu);
498*4882a593Smuzhiyun 		for (i = 0; i < num_counters; i++) {
499*4882a593Smuzhiyun 			per_cpu(pmc_values, cpu + prev_hdw_thread)[i]
500*4882a593Smuzhiyun 				= cbe_read_ctr(cpu, i);
501*4882a593Smuzhiyun 
502*4882a593Smuzhiyun 			if (per_cpu(pmc_values, cpu + next_hdw_thread)[i]
503*4882a593Smuzhiyun 			    == 0xFFFFFFFF)
504*4882a593Smuzhiyun 				/* If the cntr value is 0xffffffff, we must
505*4882a593Smuzhiyun 				 * reset that to 0xfffffff0 when the current
506*4882a593Smuzhiyun 				 * thread is restarted.	 This will generate a
507*4882a593Smuzhiyun 				 * new interrupt and make sure that we never
508*4882a593Smuzhiyun 				 * restore the counters to the max value.  If
509*4882a593Smuzhiyun 				 * the counters were restored to the max value,
510*4882a593Smuzhiyun 				 * they do not increment and no interrupts are
511*4882a593Smuzhiyun 				 * generated.  Hence no more samples will be
512*4882a593Smuzhiyun 				 * collected on that cpu.
513*4882a593Smuzhiyun 				 */
514*4882a593Smuzhiyun 				cbe_write_ctr(cpu, i, 0xFFFFFFF0);
515*4882a593Smuzhiyun 			else
516*4882a593Smuzhiyun 				cbe_write_ctr(cpu, i,
517*4882a593Smuzhiyun 					      per_cpu(pmc_values,
518*4882a593Smuzhiyun 						      cpu +
519*4882a593Smuzhiyun 						      next_hdw_thread)[i]);
520*4882a593Smuzhiyun 		}
521*4882a593Smuzhiyun 
522*4882a593Smuzhiyun 		/*
523*4882a593Smuzhiyun 		 * Switch to the other thread. Change the interrupt
524*4882a593Smuzhiyun 		 * and control regs to be scheduled on the CPU
525*4882a593Smuzhiyun 		 * corresponding to the thread to execute.
526*4882a593Smuzhiyun 		 */
527*4882a593Smuzhiyun 		for (i = 0; i < num_counters; i++) {
528*4882a593Smuzhiyun 			if (pmc_cntrl[next_hdw_thread][i].enabled) {
529*4882a593Smuzhiyun 				/*
530*4882a593Smuzhiyun 				 * There are some per thread events.
531*4882a593Smuzhiyun 				 * Must do the set event, enable_cntr
532*4882a593Smuzhiyun 				 * for each cpu.
533*4882a593Smuzhiyun 				 */
534*4882a593Smuzhiyun 				enable_ctr(cpu, i,
535*4882a593Smuzhiyun 					   pm_regs.pm07_cntrl);
536*4882a593Smuzhiyun 			} else {
537*4882a593Smuzhiyun 				cbe_write_pm07_control(cpu, i, 0);
538*4882a593Smuzhiyun 			}
539*4882a593Smuzhiyun 		}
540*4882a593Smuzhiyun 
541*4882a593Smuzhiyun 		/* Enable interrupts on the CPU thread that is starting */
542*4882a593Smuzhiyun 		cbe_enable_pm_interrupts(cpu, next_hdw_thread,
543*4882a593Smuzhiyun 					 virt_cntr_inter_mask);
544*4882a593Smuzhiyun 		cbe_enable_pm(cpu);
545*4882a593Smuzhiyun 	}
546*4882a593Smuzhiyun 
547*4882a593Smuzhiyun 	spin_unlock_irqrestore(&cntr_lock, flags);
548*4882a593Smuzhiyun 
549*4882a593Smuzhiyun 	mod_timer(&timer_virt_cntr, jiffies + HZ / 10);
550*4882a593Smuzhiyun }
551*4882a593Smuzhiyun 
start_virt_cntrs(void)552*4882a593Smuzhiyun static void start_virt_cntrs(void)
553*4882a593Smuzhiyun {
554*4882a593Smuzhiyun 	timer_setup(&timer_virt_cntr, cell_virtual_cntr, 0);
555*4882a593Smuzhiyun 	timer_virt_cntr.expires = jiffies + HZ / 10;
556*4882a593Smuzhiyun 	add_timer(&timer_virt_cntr);
557*4882a593Smuzhiyun }
558*4882a593Smuzhiyun 
cell_reg_setup_spu_cycles(struct op_counter_config * ctr,struct op_system_config * sys,int num_ctrs)559*4882a593Smuzhiyun static int cell_reg_setup_spu_cycles(struct op_counter_config *ctr,
560*4882a593Smuzhiyun 			struct op_system_config *sys, int num_ctrs)
561*4882a593Smuzhiyun {
562*4882a593Smuzhiyun 	spu_cycle_reset = ctr[0].count;
563*4882a593Smuzhiyun 
564*4882a593Smuzhiyun 	/*
565*4882a593Smuzhiyun 	 * Each node will need to make the rtas call to start
566*4882a593Smuzhiyun 	 * and stop SPU profiling.  Get the token once and store it.
567*4882a593Smuzhiyun 	 */
568*4882a593Smuzhiyun 	spu_rtas_token = rtas_token("ibm,cbe-spu-perftools");
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 	if (unlikely(spu_rtas_token == RTAS_UNKNOWN_SERVICE)) {
571*4882a593Smuzhiyun 		printk(KERN_ERR
572*4882a593Smuzhiyun 		       "%s: rtas token ibm,cbe-spu-perftools unknown\n",
573*4882a593Smuzhiyun 		       __func__);
574*4882a593Smuzhiyun 		return -EIO;
575*4882a593Smuzhiyun 	}
576*4882a593Smuzhiyun 	return 0;
577*4882a593Smuzhiyun }
578*4882a593Smuzhiyun 
579*4882a593Smuzhiyun /* Unfortunately, the hardware will only support event profiling
580*4882a593Smuzhiyun  * on one SPU per node at a time.  Therefore, we must time slice
581*4882a593Smuzhiyun  * the profiling across all SPUs in the node.  Note, we do this
582*4882a593Smuzhiyun  * in parallel for each node.  The following routine is called
583*4882a593Smuzhiyun  * periodically based on kernel timer to switch which SPU is
584*4882a593Smuzhiyun  * being monitored in a round robbin fashion.
585*4882a593Smuzhiyun  */
spu_evnt_swap(struct timer_list * unused)586*4882a593Smuzhiyun static void spu_evnt_swap(struct timer_list *unused)
587*4882a593Smuzhiyun {
588*4882a593Smuzhiyun 	int node;
589*4882a593Smuzhiyun 	int cur_phys_spu, nxt_phys_spu, cur_spu_evnt_phys_spu_indx;
590*4882a593Smuzhiyun 	unsigned long flags;
591*4882a593Smuzhiyun 	int cpu;
592*4882a593Smuzhiyun 	int ret;
593*4882a593Smuzhiyun 	u32 interrupt_mask;
594*4882a593Smuzhiyun 
595*4882a593Smuzhiyun 
596*4882a593Smuzhiyun 	/* enable interrupts on cntr 0 */
597*4882a593Smuzhiyun 	interrupt_mask = CBE_PM_CTR_OVERFLOW_INTR(0);
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun 	hdw_thread = 0;
600*4882a593Smuzhiyun 
601*4882a593Smuzhiyun 	/* Make sure spu event interrupt handler and spu event swap
602*4882a593Smuzhiyun 	 * don't access the counters simultaneously.
603*4882a593Smuzhiyun 	 */
604*4882a593Smuzhiyun 	spin_lock_irqsave(&cntr_lock, flags);
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	cur_spu_evnt_phys_spu_indx = spu_evnt_phys_spu_indx;
607*4882a593Smuzhiyun 
608*4882a593Smuzhiyun 	if (++(spu_evnt_phys_spu_indx) == NUM_SPUS_PER_NODE)
609*4882a593Smuzhiyun 		spu_evnt_phys_spu_indx = 0;
610*4882a593Smuzhiyun 
611*4882a593Smuzhiyun 	pm_signal[0].sub_unit = spu_evnt_phys_spu_indx;
612*4882a593Smuzhiyun 	pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
613*4882a593Smuzhiyun 	pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	/* switch the SPU being profiled on each node */
616*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
617*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
618*4882a593Smuzhiyun 			continue;
619*4882a593Smuzhiyun 
620*4882a593Smuzhiyun 		node = cbe_cpu_to_node(cpu);
621*4882a593Smuzhiyun 		cur_phys_spu = (node * NUM_SPUS_PER_NODE)
622*4882a593Smuzhiyun 			+ cur_spu_evnt_phys_spu_indx;
623*4882a593Smuzhiyun 		nxt_phys_spu = (node * NUM_SPUS_PER_NODE)
624*4882a593Smuzhiyun 			+ spu_evnt_phys_spu_indx;
625*4882a593Smuzhiyun 
626*4882a593Smuzhiyun 		/*
627*4882a593Smuzhiyun 		 * stop counters, save counter values, restore counts
628*4882a593Smuzhiyun 		 * for previous physical SPU
629*4882a593Smuzhiyun 		 */
630*4882a593Smuzhiyun 		cbe_disable_pm(cpu);
631*4882a593Smuzhiyun 		cbe_disable_pm_interrupts(cpu);
632*4882a593Smuzhiyun 
633*4882a593Smuzhiyun 		spu_pm_cnt[cur_phys_spu]
634*4882a593Smuzhiyun 			= cbe_read_ctr(cpu, 0);
635*4882a593Smuzhiyun 
636*4882a593Smuzhiyun 		/* restore previous count for the next spu to sample */
637*4882a593Smuzhiyun 		/* NOTE, hardware issue, counter will not start if the
638*4882a593Smuzhiyun 		 * counter value is at max (0xFFFFFFFF).
639*4882a593Smuzhiyun 		 */
640*4882a593Smuzhiyun 		if (spu_pm_cnt[nxt_phys_spu] >= 0xFFFFFFFF)
641*4882a593Smuzhiyun 			cbe_write_ctr(cpu, 0, 0xFFFFFFF0);
642*4882a593Smuzhiyun 		 else
643*4882a593Smuzhiyun 			 cbe_write_ctr(cpu, 0, spu_pm_cnt[nxt_phys_spu]);
644*4882a593Smuzhiyun 
645*4882a593Smuzhiyun 		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
646*4882a593Smuzhiyun 
647*4882a593Smuzhiyun 		/* setup the debug bus measure the one event and
648*4882a593Smuzhiyun 		 * the two events to route the next SPU's PC on
649*4882a593Smuzhiyun 		 * the debug bus
650*4882a593Smuzhiyun 		 */
651*4882a593Smuzhiyun 		ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu), 3);
652*4882a593Smuzhiyun 		if (ret)
653*4882a593Smuzhiyun 			printk(KERN_ERR "%s: pm_rtas_activate_signals failed, "
654*4882a593Smuzhiyun 			       "SPU event swap\n", __func__);
655*4882a593Smuzhiyun 
656*4882a593Smuzhiyun 		/* clear the trace buffer, don't want to take PC for
657*4882a593Smuzhiyun 		 * previous SPU*/
658*4882a593Smuzhiyun 		cbe_write_pm(cpu, trace_address, 0);
659*4882a593Smuzhiyun 
660*4882a593Smuzhiyun 		enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
661*4882a593Smuzhiyun 
662*4882a593Smuzhiyun 		/* Enable interrupts on the CPU thread that is starting */
663*4882a593Smuzhiyun 		cbe_enable_pm_interrupts(cpu, hdw_thread,
664*4882a593Smuzhiyun 					 interrupt_mask);
665*4882a593Smuzhiyun 		cbe_enable_pm(cpu);
666*4882a593Smuzhiyun 	}
667*4882a593Smuzhiyun 
668*4882a593Smuzhiyun 	spin_unlock_irqrestore(&cntr_lock, flags);
669*4882a593Smuzhiyun 
670*4882a593Smuzhiyun 	/* swap approximately every 0.1 seconds */
671*4882a593Smuzhiyun 	mod_timer(&timer_spu_event_swap, jiffies + HZ / 25);
672*4882a593Smuzhiyun }
673*4882a593Smuzhiyun 
start_spu_event_swap(void)674*4882a593Smuzhiyun static void start_spu_event_swap(void)
675*4882a593Smuzhiyun {
676*4882a593Smuzhiyun 	timer_setup(&timer_spu_event_swap, spu_evnt_swap, 0);
677*4882a593Smuzhiyun 	timer_spu_event_swap.expires = jiffies + HZ / 25;
678*4882a593Smuzhiyun 	add_timer(&timer_spu_event_swap);
679*4882a593Smuzhiyun }
680*4882a593Smuzhiyun 
cell_reg_setup_spu_events(struct op_counter_config * ctr,struct op_system_config * sys,int num_ctrs)681*4882a593Smuzhiyun static int cell_reg_setup_spu_events(struct op_counter_config *ctr,
682*4882a593Smuzhiyun 			struct op_system_config *sys, int num_ctrs)
683*4882a593Smuzhiyun {
684*4882a593Smuzhiyun 	int i;
685*4882a593Smuzhiyun 
686*4882a593Smuzhiyun 	/* routine is called once for all nodes */
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 	spu_evnt_phys_spu_indx = 0;
689*4882a593Smuzhiyun 	/*
690*4882a593Smuzhiyun 	 * For all events except PPU CYCLEs, each node will need to make
691*4882a593Smuzhiyun 	 * the rtas cbe-perftools call to setup and reset the debug bus.
692*4882a593Smuzhiyun 	 * Make the token lookup call once and store it in the global
693*4882a593Smuzhiyun 	 * variable pm_rtas_token.
694*4882a593Smuzhiyun 	 */
695*4882a593Smuzhiyun 	pm_rtas_token = rtas_token("ibm,cbe-perftools");
696*4882a593Smuzhiyun 
697*4882a593Smuzhiyun 	if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
698*4882a593Smuzhiyun 		printk(KERN_ERR
699*4882a593Smuzhiyun 		       "%s: rtas token ibm,cbe-perftools unknown\n",
700*4882a593Smuzhiyun 		       __func__);
701*4882a593Smuzhiyun 		return -EIO;
702*4882a593Smuzhiyun 	}
703*4882a593Smuzhiyun 
704*4882a593Smuzhiyun 	/* setup the pm_control register settings,
705*4882a593Smuzhiyun 	 * settings will be written per node by the
706*4882a593Smuzhiyun 	 * cell_cpu_setup() function.
707*4882a593Smuzhiyun 	 */
708*4882a593Smuzhiyun 	pm_regs.pm_cntrl.trace_buf_ovflw = 1;
709*4882a593Smuzhiyun 
710*4882a593Smuzhiyun 	/* Use the occurrence trace mode to have SPU PC saved
711*4882a593Smuzhiyun 	 * to the trace buffer.  Occurrence data in trace buffer
712*4882a593Smuzhiyun 	 * is not used.  Bit 2 must be set to store SPU addresses.
713*4882a593Smuzhiyun 	 */
714*4882a593Smuzhiyun 	pm_regs.pm_cntrl.trace_mode = 2;
715*4882a593Smuzhiyun 
716*4882a593Smuzhiyun 	pm_regs.pm_cntrl.spu_addr_trace = 0x1;  /* using debug bus
717*4882a593Smuzhiyun 						   event 2 & 3 */
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	/* setup the debug bus event array with the SPU PC routing events.
720*4882a593Smuzhiyun 	*  Note, pm_signal[0] will be filled in by set_pm_event() call below.
721*4882a593Smuzhiyun 	*/
722*4882a593Smuzhiyun 	pm_signal[1].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
723*4882a593Smuzhiyun 	pm_signal[1].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_A);
724*4882a593Smuzhiyun 	pm_signal[1].bit = SPU_PROFILE_EVENT_ADDR % 100;
725*4882a593Smuzhiyun 	pm_signal[1].sub_unit = spu_evnt_phys_spu_indx;
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 	pm_signal[2].signal_group = SPU_PROFILE_EVENT_ADDR / 100;
728*4882a593Smuzhiyun 	pm_signal[2].bus_word = GET_BUS_WORD(SPU_PROFILE_EVENT_ADDR_MASK_B);
729*4882a593Smuzhiyun 	pm_signal[2].bit = SPU_PROFILE_EVENT_ADDR % 100;
730*4882a593Smuzhiyun 	pm_signal[2].sub_unit = spu_evnt_phys_spu_indx;
731*4882a593Smuzhiyun 
732*4882a593Smuzhiyun 	/* Set the user selected spu event to profile on,
733*4882a593Smuzhiyun 	 * note, only one SPU profiling event is supported
734*4882a593Smuzhiyun 	 */
735*4882a593Smuzhiyun 	num_counters = 1;  /* Only support one SPU event at a time */
736*4882a593Smuzhiyun 	set_pm_event(0, ctr[0].event, ctr[0].unit_mask);
737*4882a593Smuzhiyun 
738*4882a593Smuzhiyun 	reset_value[0] = 0xFFFFFFFF - ctr[0].count;
739*4882a593Smuzhiyun 
740*4882a593Smuzhiyun 	/* global, used by cell_cpu_setup */
741*4882a593Smuzhiyun 	ctr_enabled |= 1;
742*4882a593Smuzhiyun 
743*4882a593Smuzhiyun 	/* Initialize the count for each SPU to the reset value */
744*4882a593Smuzhiyun 	for (i=0; i < MAX_NUMNODES * NUM_SPUS_PER_NODE; i++)
745*4882a593Smuzhiyun 		spu_pm_cnt[i] = reset_value[0];
746*4882a593Smuzhiyun 
747*4882a593Smuzhiyun 	return 0;
748*4882a593Smuzhiyun }
749*4882a593Smuzhiyun 
cell_reg_setup_ppu(struct op_counter_config * ctr,struct op_system_config * sys,int num_ctrs)750*4882a593Smuzhiyun static int cell_reg_setup_ppu(struct op_counter_config *ctr,
751*4882a593Smuzhiyun 			struct op_system_config *sys, int num_ctrs)
752*4882a593Smuzhiyun {
753*4882a593Smuzhiyun 	/* routine is called once for all nodes */
754*4882a593Smuzhiyun 	int i, j, cpu;
755*4882a593Smuzhiyun 
756*4882a593Smuzhiyun 	num_counters = num_ctrs;
757*4882a593Smuzhiyun 
758*4882a593Smuzhiyun 	if (unlikely(num_ctrs > NR_PHYS_CTRS)) {
759*4882a593Smuzhiyun 		printk(KERN_ERR
760*4882a593Smuzhiyun 		       "%s: Oprofile, number of specified events " \
761*4882a593Smuzhiyun 		       "exceeds number of physical counters\n",
762*4882a593Smuzhiyun 		       __func__);
763*4882a593Smuzhiyun 		return -EIO;
764*4882a593Smuzhiyun 	}
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	set_count_mode(sys->enable_kernel, sys->enable_user);
767*4882a593Smuzhiyun 
768*4882a593Smuzhiyun 	/* Setup the thread 0 events */
769*4882a593Smuzhiyun 	for (i = 0; i < num_ctrs; ++i) {
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun 		pmc_cntrl[0][i].evnts = ctr[i].event;
772*4882a593Smuzhiyun 		pmc_cntrl[0][i].masks = ctr[i].unit_mask;
773*4882a593Smuzhiyun 		pmc_cntrl[0][i].enabled = ctr[i].enabled;
774*4882a593Smuzhiyun 		pmc_cntrl[0][i].vcntr = i;
775*4882a593Smuzhiyun 
776*4882a593Smuzhiyun 		for_each_possible_cpu(j)
777*4882a593Smuzhiyun 			per_cpu(pmc_values, j)[i] = 0;
778*4882a593Smuzhiyun 	}
779*4882a593Smuzhiyun 
780*4882a593Smuzhiyun 	/*
781*4882a593Smuzhiyun 	 * Setup the thread 1 events, map the thread 0 event to the
782*4882a593Smuzhiyun 	 * equivalent thread 1 event.
783*4882a593Smuzhiyun 	 */
784*4882a593Smuzhiyun 	for (i = 0; i < num_ctrs; ++i) {
785*4882a593Smuzhiyun 		if ((ctr[i].event >= 2100) && (ctr[i].event <= 2111))
786*4882a593Smuzhiyun 			pmc_cntrl[1][i].evnts = ctr[i].event + 19;
787*4882a593Smuzhiyun 		else if (ctr[i].event == 2203)
788*4882a593Smuzhiyun 			pmc_cntrl[1][i].evnts = ctr[i].event;
789*4882a593Smuzhiyun 		else if ((ctr[i].event >= 2200) && (ctr[i].event <= 2215))
790*4882a593Smuzhiyun 			pmc_cntrl[1][i].evnts = ctr[i].event + 16;
791*4882a593Smuzhiyun 		else
792*4882a593Smuzhiyun 			pmc_cntrl[1][i].evnts = ctr[i].event;
793*4882a593Smuzhiyun 
794*4882a593Smuzhiyun 		pmc_cntrl[1][i].masks = ctr[i].unit_mask;
795*4882a593Smuzhiyun 		pmc_cntrl[1][i].enabled = ctr[i].enabled;
796*4882a593Smuzhiyun 		pmc_cntrl[1][i].vcntr = i;
797*4882a593Smuzhiyun 	}
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 	for (i = 0; i < NUM_INPUT_BUS_WORDS; i++)
800*4882a593Smuzhiyun 		input_bus[i] = 0xff;
801*4882a593Smuzhiyun 
802*4882a593Smuzhiyun 	/*
803*4882a593Smuzhiyun 	 * Our counters count up, and "count" refers to
804*4882a593Smuzhiyun 	 * how much before the next interrupt, and we interrupt
805*4882a593Smuzhiyun 	 * on overflow.	 So we calculate the starting value
806*4882a593Smuzhiyun 	 * which will give us "count" until overflow.
807*4882a593Smuzhiyun 	 * Then we set the events on the enabled counters.
808*4882a593Smuzhiyun 	 */
809*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
810*4882a593Smuzhiyun 		/* start with virtual counter set 0 */
811*4882a593Smuzhiyun 		if (pmc_cntrl[0][i].enabled) {
812*4882a593Smuzhiyun 			/* Using 32bit counters, reset max - count */
813*4882a593Smuzhiyun 			reset_value[i] = 0xFFFFFFFF - ctr[i].count;
814*4882a593Smuzhiyun 			set_pm_event(i,
815*4882a593Smuzhiyun 				     pmc_cntrl[0][i].evnts,
816*4882a593Smuzhiyun 				     pmc_cntrl[0][i].masks);
817*4882a593Smuzhiyun 
818*4882a593Smuzhiyun 			/* global, used by cell_cpu_setup */
819*4882a593Smuzhiyun 			ctr_enabled |= (1 << i);
820*4882a593Smuzhiyun 		}
821*4882a593Smuzhiyun 	}
822*4882a593Smuzhiyun 
823*4882a593Smuzhiyun 	/* initialize the previous counts for the virtual cntrs */
824*4882a593Smuzhiyun 	for_each_online_cpu(cpu)
825*4882a593Smuzhiyun 		for (i = 0; i < num_counters; ++i) {
826*4882a593Smuzhiyun 			per_cpu(pmc_values, cpu)[i] = reset_value[i];
827*4882a593Smuzhiyun 		}
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	return 0;
830*4882a593Smuzhiyun }
831*4882a593Smuzhiyun 
832*4882a593Smuzhiyun 
833*4882a593Smuzhiyun /* This function is called once for all cpus combined */
cell_reg_setup(struct op_counter_config * ctr,struct op_system_config * sys,int num_ctrs)834*4882a593Smuzhiyun static int cell_reg_setup(struct op_counter_config *ctr,
835*4882a593Smuzhiyun 			struct op_system_config *sys, int num_ctrs)
836*4882a593Smuzhiyun {
837*4882a593Smuzhiyun 	int ret=0;
838*4882a593Smuzhiyun 	spu_cycle_reset = 0;
839*4882a593Smuzhiyun 
840*4882a593Smuzhiyun 	/* initialize the spu_arr_trace value, will be reset if
841*4882a593Smuzhiyun 	 * doing spu event profiling.
842*4882a593Smuzhiyun 	 */
843*4882a593Smuzhiyun 	pm_regs.group_control = 0;
844*4882a593Smuzhiyun 	pm_regs.debug_bus_control = 0;
845*4882a593Smuzhiyun 	pm_regs.pm_cntrl.stop_at_max = 1;
846*4882a593Smuzhiyun 	pm_regs.pm_cntrl.trace_mode = 0;
847*4882a593Smuzhiyun 	pm_regs.pm_cntrl.freeze = 1;
848*4882a593Smuzhiyun 	pm_regs.pm_cntrl.trace_buf_ovflw = 0;
849*4882a593Smuzhiyun 	pm_regs.pm_cntrl.spu_addr_trace = 0;
850*4882a593Smuzhiyun 
851*4882a593Smuzhiyun 	/*
852*4882a593Smuzhiyun 	 * For all events except PPU CYCLEs, each node will need to make
853*4882a593Smuzhiyun 	 * the rtas cbe-perftools call to setup and reset the debug bus.
854*4882a593Smuzhiyun 	 * Make the token lookup call once and store it in the global
855*4882a593Smuzhiyun 	 * variable pm_rtas_token.
856*4882a593Smuzhiyun 	 */
857*4882a593Smuzhiyun 	pm_rtas_token = rtas_token("ibm,cbe-perftools");
858*4882a593Smuzhiyun 
859*4882a593Smuzhiyun 	if (unlikely(pm_rtas_token == RTAS_UNKNOWN_SERVICE)) {
860*4882a593Smuzhiyun 		printk(KERN_ERR
861*4882a593Smuzhiyun 		       "%s: rtas token ibm,cbe-perftools unknown\n",
862*4882a593Smuzhiyun 		       __func__);
863*4882a593Smuzhiyun 		return -EIO;
864*4882a593Smuzhiyun 	}
865*4882a593Smuzhiyun 
866*4882a593Smuzhiyun 	if (ctr[0].event == SPU_CYCLES_EVENT_NUM) {
867*4882a593Smuzhiyun 		profiling_mode = SPU_PROFILING_CYCLES;
868*4882a593Smuzhiyun 		ret = cell_reg_setup_spu_cycles(ctr, sys, num_ctrs);
869*4882a593Smuzhiyun 	} else if ((ctr[0].event >= SPU_EVENT_NUM_START) &&
870*4882a593Smuzhiyun 		   (ctr[0].event <= SPU_EVENT_NUM_STOP)) {
871*4882a593Smuzhiyun 		profiling_mode = SPU_PROFILING_EVENTS;
872*4882a593Smuzhiyun 		spu_cycle_reset = ctr[0].count;
873*4882a593Smuzhiyun 
874*4882a593Smuzhiyun 		/* for SPU event profiling, need to setup the
875*4882a593Smuzhiyun 		 * pm_signal array with the events to route the
876*4882a593Smuzhiyun 		 * SPU PC before making the FW call.  Note, only
877*4882a593Smuzhiyun 		 * one SPU event for profiling can be specified
878*4882a593Smuzhiyun 		 * at a time.
879*4882a593Smuzhiyun 		 */
880*4882a593Smuzhiyun 		cell_reg_setup_spu_events(ctr, sys, num_ctrs);
881*4882a593Smuzhiyun 	} else {
882*4882a593Smuzhiyun 		profiling_mode = PPU_PROFILING;
883*4882a593Smuzhiyun 		ret = cell_reg_setup_ppu(ctr, sys, num_ctrs);
884*4882a593Smuzhiyun 	}
885*4882a593Smuzhiyun 
886*4882a593Smuzhiyun 	return ret;
887*4882a593Smuzhiyun }
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun 
890*4882a593Smuzhiyun 
891*4882a593Smuzhiyun /* This function is called once for each cpu */
cell_cpu_setup(struct op_counter_config * cntr)892*4882a593Smuzhiyun static int cell_cpu_setup(struct op_counter_config *cntr)
893*4882a593Smuzhiyun {
894*4882a593Smuzhiyun 	u32 cpu = smp_processor_id();
895*4882a593Smuzhiyun 	u32 num_enabled = 0;
896*4882a593Smuzhiyun 	int i;
897*4882a593Smuzhiyun 	int ret;
898*4882a593Smuzhiyun 
899*4882a593Smuzhiyun 	/* Cycle based SPU profiling does not use the performance
900*4882a593Smuzhiyun 	 * counters.  The trace array is configured to collect
901*4882a593Smuzhiyun 	 * the data.
902*4882a593Smuzhiyun 	 */
903*4882a593Smuzhiyun 	if (profiling_mode == SPU_PROFILING_CYCLES)
904*4882a593Smuzhiyun 		return 0;
905*4882a593Smuzhiyun 
906*4882a593Smuzhiyun 	/* There is one performance monitor per processor chip (i.e. node),
907*4882a593Smuzhiyun 	 * so we only need to perform this function once per node.
908*4882a593Smuzhiyun 	 */
909*4882a593Smuzhiyun 	if (cbe_get_hw_thread_id(cpu))
910*4882a593Smuzhiyun 		return 0;
911*4882a593Smuzhiyun 
912*4882a593Smuzhiyun 	/* Stop all counters */
913*4882a593Smuzhiyun 	cbe_disable_pm(cpu);
914*4882a593Smuzhiyun 	cbe_disable_pm_interrupts(cpu);
915*4882a593Smuzhiyun 
916*4882a593Smuzhiyun 	cbe_write_pm(cpu, pm_start_stop, 0);
917*4882a593Smuzhiyun 	cbe_write_pm(cpu, group_control, pm_regs.group_control);
918*4882a593Smuzhiyun 	cbe_write_pm(cpu, debug_bus_control, pm_regs.debug_bus_control);
919*4882a593Smuzhiyun 	write_pm_cntrl(cpu);
920*4882a593Smuzhiyun 
921*4882a593Smuzhiyun 	for (i = 0; i < num_counters; ++i) {
922*4882a593Smuzhiyun 		if (ctr_enabled & (1 << i)) {
923*4882a593Smuzhiyun 			pm_signal[num_enabled].cpu = cbe_cpu_to_node(cpu);
924*4882a593Smuzhiyun 			num_enabled++;
925*4882a593Smuzhiyun 		}
926*4882a593Smuzhiyun 	}
927*4882a593Smuzhiyun 
928*4882a593Smuzhiyun 	/*
929*4882a593Smuzhiyun 	 * The pm_rtas_activate_signals will return -EIO if the FW
930*4882a593Smuzhiyun 	 * call failed.
931*4882a593Smuzhiyun 	 */
932*4882a593Smuzhiyun 	if (profiling_mode == SPU_PROFILING_EVENTS) {
933*4882a593Smuzhiyun 		/* For SPU event profiling also need to setup the
934*4882a593Smuzhiyun 		 * pm interval timer
935*4882a593Smuzhiyun 		 */
936*4882a593Smuzhiyun 		ret = pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
937*4882a593Smuzhiyun 					       num_enabled+2);
938*4882a593Smuzhiyun 		/* store PC from debug bus to Trace buffer as often
939*4882a593Smuzhiyun 		 * as possible (every 10 cycles)
940*4882a593Smuzhiyun 		 */
941*4882a593Smuzhiyun 		cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
942*4882a593Smuzhiyun 		return ret;
943*4882a593Smuzhiyun 	} else
944*4882a593Smuzhiyun 		return pm_rtas_activate_signals(cbe_cpu_to_node(cpu),
945*4882a593Smuzhiyun 						num_enabled);
946*4882a593Smuzhiyun }
947*4882a593Smuzhiyun 
948*4882a593Smuzhiyun #define ENTRIES	 303
949*4882a593Smuzhiyun #define MAXLFSR	 0xFFFFFF
950*4882a593Smuzhiyun 
951*4882a593Smuzhiyun /* precomputed table of 24 bit LFSR values */
952*4882a593Smuzhiyun static int initial_lfsr[] = {
953*4882a593Smuzhiyun  8221349, 12579195, 5379618, 10097839, 7512963, 7519310, 3955098, 10753424,
954*4882a593Smuzhiyun  15507573, 7458917, 285419, 2641121, 9780088, 3915503, 6668768, 1548716,
955*4882a593Smuzhiyun  4885000, 8774424, 9650099, 2044357, 2304411, 9326253, 10332526, 4421547,
956*4882a593Smuzhiyun  3440748, 10179459, 13332843, 10375561, 1313462, 8375100, 5198480, 6071392,
957*4882a593Smuzhiyun  9341783, 1526887, 3985002, 1439429, 13923762, 7010104, 11969769, 4547026,
958*4882a593Smuzhiyun  2040072, 4025602, 3437678, 7939992, 11444177, 4496094, 9803157, 10745556,
959*4882a593Smuzhiyun  3671780, 4257846, 5662259, 13196905, 3237343, 12077182, 16222879, 7587769,
960*4882a593Smuzhiyun  14706824, 2184640, 12591135, 10420257, 7406075, 3648978, 11042541, 15906893,
961*4882a593Smuzhiyun  11914928, 4732944, 10695697, 12928164, 11980531, 4430912, 11939291, 2917017,
962*4882a593Smuzhiyun  6119256, 4172004, 9373765, 8410071, 14788383, 5047459, 5474428, 1737756,
963*4882a593Smuzhiyun  15967514, 13351758, 6691285, 8034329, 2856544, 14394753, 11310160, 12149558,
964*4882a593Smuzhiyun  7487528, 7542781, 15668898, 12525138, 12790975, 3707933, 9106617, 1965401,
965*4882a593Smuzhiyun  16219109, 12801644, 2443203, 4909502, 8762329, 3120803, 6360315, 9309720,
966*4882a593Smuzhiyun  15164599, 10844842, 4456529, 6667610, 14924259, 884312, 6234963, 3326042,
967*4882a593Smuzhiyun  15973422, 13919464, 5272099, 6414643, 3909029, 2764324, 5237926, 4774955,
968*4882a593Smuzhiyun  10445906, 4955302, 5203726, 10798229, 11443419, 2303395, 333836, 9646934,
969*4882a593Smuzhiyun  3464726, 4159182, 568492, 995747, 10318756, 13299332, 4836017, 8237783,
970*4882a593Smuzhiyun  3878992, 2581665, 11394667, 5672745, 14412947, 3159169, 9094251, 16467278,
971*4882a593Smuzhiyun  8671392, 15230076, 4843545, 7009238, 15504095, 1494895, 9627886, 14485051,
972*4882a593Smuzhiyun  8304291, 252817, 12421642, 16085736, 4774072, 2456177, 4160695, 15409741,
973*4882a593Smuzhiyun  4902868, 5793091, 13162925, 16039714, 782255, 11347835, 14884586, 366972,
974*4882a593Smuzhiyun  16308990, 11913488, 13390465, 2958444, 10340278, 1177858, 1319431, 10426302,
975*4882a593Smuzhiyun  2868597, 126119, 5784857, 5245324, 10903900, 16436004, 3389013, 1742384,
976*4882a593Smuzhiyun  14674502, 10279218, 8536112, 10364279, 6877778, 14051163, 1025130, 6072469,
977*4882a593Smuzhiyun  1988305, 8354440, 8216060, 16342977, 13112639, 3976679, 5913576, 8816697,
978*4882a593Smuzhiyun  6879995, 14043764, 3339515, 9364420, 15808858, 12261651, 2141560, 5636398,
979*4882a593Smuzhiyun  10345425, 10414756, 781725, 6155650, 4746914, 5078683, 7469001, 6799140,
980*4882a593Smuzhiyun  10156444, 9667150, 10116470, 4133858, 2121972, 1124204, 1003577, 1611214,
981*4882a593Smuzhiyun  14304602, 16221850, 13878465, 13577744, 3629235, 8772583, 10881308, 2410386,
982*4882a593Smuzhiyun  7300044, 5378855, 9301235, 12755149, 4977682, 8083074, 10327581, 6395087,
983*4882a593Smuzhiyun  9155434, 15501696, 7514362, 14520507, 15808945, 3244584, 4741962, 9658130,
984*4882a593Smuzhiyun  14336147, 8654727, 7969093, 15759799, 14029445, 5038459, 9894848, 8659300,
985*4882a593Smuzhiyun  13699287, 8834306, 10712885, 14753895, 10410465, 3373251, 309501, 9561475,
986*4882a593Smuzhiyun  5526688, 14647426, 14209836, 5339224, 207299, 14069911, 8722990, 2290950,
987*4882a593Smuzhiyun  3258216, 12505185, 6007317, 9218111, 14661019, 10537428, 11731949, 9027003,
988*4882a593Smuzhiyun  6641507, 9490160, 200241, 9720425, 16277895, 10816638, 1554761, 10431375,
989*4882a593Smuzhiyun  7467528, 6790302, 3429078, 14633753, 14428997, 11463204, 3576212, 2003426,
990*4882a593Smuzhiyun  6123687, 820520, 9992513, 15784513, 5778891, 6428165, 8388607
991*4882a593Smuzhiyun };
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun /*
994*4882a593Smuzhiyun  * The hardware uses an LFSR counting sequence to determine when to capture
995*4882a593Smuzhiyun  * the SPU PCs.	 An LFSR sequence is like a puesdo random number sequence
996*4882a593Smuzhiyun  * where each number occurs once in the sequence but the sequence is not in
997*4882a593Smuzhiyun  * numerical order. The SPU PC capture is done when the LFSR sequence reaches
998*4882a593Smuzhiyun  * the last value in the sequence.  Hence the user specified value N
999*4882a593Smuzhiyun  * corresponds to the LFSR number that is N from the end of the sequence.
1000*4882a593Smuzhiyun  *
1001*4882a593Smuzhiyun  * To avoid the time to compute the LFSR, a lookup table is used.  The 24 bit
1002*4882a593Smuzhiyun  * LFSR sequence is broken into four ranges.  The spacing of the precomputed
1003*4882a593Smuzhiyun  * values is adjusted in each range so the error between the user specified
1004*4882a593Smuzhiyun  * number (N) of events between samples and the actual number of events based
1005*4882a593Smuzhiyun  * on the precomputed value will be les then about 6.2%.  Note, if the user
1006*4882a593Smuzhiyun  * specifies N < 2^16, the LFSR value that is 2^16 from the end will be used.
1007*4882a593Smuzhiyun  * This is to prevent the loss of samples because the trace buffer is full.
1008*4882a593Smuzhiyun  *
1009*4882a593Smuzhiyun  *	   User specified N		     Step between	   Index in
1010*4882a593Smuzhiyun  *					 precomputed values	 precomputed
1011*4882a593Smuzhiyun  *								    table
1012*4882a593Smuzhiyun  * 0		    to	2^16-1			----		      0
1013*4882a593Smuzhiyun  * 2^16	    to	2^16+2^19-1		2^12		    1 to 128
1014*4882a593Smuzhiyun  * 2^16+2^19	    to	2^16+2^19+2^22-1	2^15		  129 to 256
1015*4882a593Smuzhiyun  * 2^16+2^19+2^22  to	2^24-1			2^18		  257 to 302
1016*4882a593Smuzhiyun  *
1017*4882a593Smuzhiyun  *
1018*4882a593Smuzhiyun  * For example, the LFSR values in the second range are computed for 2^16,
1019*4882a593Smuzhiyun  * 2^16+2^12, ... , 2^19-2^16, 2^19 and stored in the table at indicies
1020*4882a593Smuzhiyun  * 1, 2,..., 127, 128.
1021*4882a593Smuzhiyun  *
1022*4882a593Smuzhiyun  * The 24 bit LFSR value for the nth number in the sequence can be
1023*4882a593Smuzhiyun  * calculated using the following code:
1024*4882a593Smuzhiyun  *
1025*4882a593Smuzhiyun  * #define size 24
1026*4882a593Smuzhiyun  * int calculate_lfsr(int n)
1027*4882a593Smuzhiyun  * {
1028*4882a593Smuzhiyun  *	int i;
1029*4882a593Smuzhiyun  *	unsigned int newlfsr0;
1030*4882a593Smuzhiyun  *	unsigned int lfsr = 0xFFFFFF;
1031*4882a593Smuzhiyun  *	unsigned int howmany = n;
1032*4882a593Smuzhiyun  *
1033*4882a593Smuzhiyun  *	for (i = 2; i < howmany + 2; i++) {
1034*4882a593Smuzhiyun  *		newlfsr0 = (((lfsr >> (size - 1 - 0)) & 1) ^
1035*4882a593Smuzhiyun  *		((lfsr >> (size - 1 - 1)) & 1) ^
1036*4882a593Smuzhiyun  *		(((lfsr >> (size - 1 - 6)) & 1) ^
1037*4882a593Smuzhiyun  *		((lfsr >> (size - 1 - 23)) & 1)));
1038*4882a593Smuzhiyun  *
1039*4882a593Smuzhiyun  *		lfsr >>= 1;
1040*4882a593Smuzhiyun  *		lfsr = lfsr | (newlfsr0 << (size - 1));
1041*4882a593Smuzhiyun  *	}
1042*4882a593Smuzhiyun  *	return lfsr;
1043*4882a593Smuzhiyun  * }
1044*4882a593Smuzhiyun  */
1045*4882a593Smuzhiyun 
1046*4882a593Smuzhiyun #define V2_16  (0x1 << 16)
1047*4882a593Smuzhiyun #define V2_19  (0x1 << 19)
1048*4882a593Smuzhiyun #define V2_22  (0x1 << 22)
1049*4882a593Smuzhiyun 
calculate_lfsr(int n)1050*4882a593Smuzhiyun static int calculate_lfsr(int n)
1051*4882a593Smuzhiyun {
1052*4882a593Smuzhiyun 	/*
1053*4882a593Smuzhiyun 	 * The ranges and steps are in powers of 2 so the calculations
1054*4882a593Smuzhiyun 	 * can be done using shifts rather then divide.
1055*4882a593Smuzhiyun 	 */
1056*4882a593Smuzhiyun 	int index;
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun 	if ((n >> 16) == 0)
1059*4882a593Smuzhiyun 		index = 0;
1060*4882a593Smuzhiyun 	else if (((n - V2_16) >> 19) == 0)
1061*4882a593Smuzhiyun 		index = ((n - V2_16) >> 12) + 1;
1062*4882a593Smuzhiyun 	else if (((n - V2_16 - V2_19) >> 22) == 0)
1063*4882a593Smuzhiyun 		index = ((n - V2_16 - V2_19) >> 15 ) + 1 + 128;
1064*4882a593Smuzhiyun 	else if (((n - V2_16 - V2_19 - V2_22) >> 24) == 0)
1065*4882a593Smuzhiyun 		index = ((n - V2_16 - V2_19 - V2_22) >> 18 ) + 1 + 256;
1066*4882a593Smuzhiyun 	else
1067*4882a593Smuzhiyun 		index = ENTRIES-1;
1068*4882a593Smuzhiyun 
1069*4882a593Smuzhiyun 	/* make sure index is valid */
1070*4882a593Smuzhiyun 	if ((index >= ENTRIES) || (index < 0))
1071*4882a593Smuzhiyun 		index = ENTRIES-1;
1072*4882a593Smuzhiyun 
1073*4882a593Smuzhiyun 	return initial_lfsr[index];
1074*4882a593Smuzhiyun }
1075*4882a593Smuzhiyun 
pm_rtas_activate_spu_profiling(u32 node)1076*4882a593Smuzhiyun static int pm_rtas_activate_spu_profiling(u32 node)
1077*4882a593Smuzhiyun {
1078*4882a593Smuzhiyun 	int ret, i;
1079*4882a593Smuzhiyun 	struct pm_signal pm_signal_local[NUM_SPUS_PER_NODE];
1080*4882a593Smuzhiyun 
1081*4882a593Smuzhiyun 	/*
1082*4882a593Smuzhiyun 	 * Set up the rtas call to configure the debug bus to
1083*4882a593Smuzhiyun 	 * route the SPU PCs.  Setup the pm_signal for each SPU
1084*4882a593Smuzhiyun 	 */
1085*4882a593Smuzhiyun 	for (i = 0; i < ARRAY_SIZE(pm_signal_local); i++) {
1086*4882a593Smuzhiyun 		pm_signal_local[i].cpu = node;
1087*4882a593Smuzhiyun 		pm_signal_local[i].signal_group = 41;
1088*4882a593Smuzhiyun 		/* spu i on word (i/2) */
1089*4882a593Smuzhiyun 		pm_signal_local[i].bus_word = 1 << i / 2;
1090*4882a593Smuzhiyun 		/* spu i */
1091*4882a593Smuzhiyun 		pm_signal_local[i].sub_unit = i;
1092*4882a593Smuzhiyun 		pm_signal_local[i].bit = 63;
1093*4882a593Smuzhiyun 	}
1094*4882a593Smuzhiyun 
1095*4882a593Smuzhiyun 	ret = rtas_ibm_cbe_perftools(SUBFUNC_ACTIVATE,
1096*4882a593Smuzhiyun 				     PASSTHRU_ENABLE, pm_signal_local,
1097*4882a593Smuzhiyun 				     (ARRAY_SIZE(pm_signal_local)
1098*4882a593Smuzhiyun 				      * sizeof(struct pm_signal)));
1099*4882a593Smuzhiyun 
1100*4882a593Smuzhiyun 	if (unlikely(ret)) {
1101*4882a593Smuzhiyun 		printk(KERN_WARNING "%s: rtas returned: %d\n",
1102*4882a593Smuzhiyun 		       __func__, ret);
1103*4882a593Smuzhiyun 		return -EIO;
1104*4882a593Smuzhiyun 	}
1105*4882a593Smuzhiyun 
1106*4882a593Smuzhiyun 	return 0;
1107*4882a593Smuzhiyun }
1108*4882a593Smuzhiyun 
1109*4882a593Smuzhiyun #ifdef CONFIG_CPU_FREQ
1110*4882a593Smuzhiyun static int
oprof_cpufreq_notify(struct notifier_block * nb,unsigned long val,void * data)1111*4882a593Smuzhiyun oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data)
1112*4882a593Smuzhiyun {
1113*4882a593Smuzhiyun 	int ret = 0;
1114*4882a593Smuzhiyun 	struct cpufreq_freqs *frq = data;
1115*4882a593Smuzhiyun 	if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) ||
1116*4882a593Smuzhiyun 	    (val == CPUFREQ_POSTCHANGE && frq->old > frq->new))
1117*4882a593Smuzhiyun 		set_spu_profiling_frequency(frq->new, spu_cycle_reset);
1118*4882a593Smuzhiyun 	return ret;
1119*4882a593Smuzhiyun }
1120*4882a593Smuzhiyun 
1121*4882a593Smuzhiyun static struct notifier_block cpu_freq_notifier_block = {
1122*4882a593Smuzhiyun 	.notifier_call	= oprof_cpufreq_notify
1123*4882a593Smuzhiyun };
1124*4882a593Smuzhiyun #endif
1125*4882a593Smuzhiyun 
1126*4882a593Smuzhiyun /*
1127*4882a593Smuzhiyun  * Note the generic OProfile stop calls do not support returning
1128*4882a593Smuzhiyun  * an error on stop.  Hence, will not return an error if the FW
1129*4882a593Smuzhiyun  * calls fail on stop.	Failure to reset the debug bus is not an issue.
1130*4882a593Smuzhiyun  * Failure to disable the SPU profiling is not an issue.  The FW calls
1131*4882a593Smuzhiyun  * to enable the performance counters and debug bus will work even if
1132*4882a593Smuzhiyun  * the hardware was not cleanly reset.
1133*4882a593Smuzhiyun  */
cell_global_stop_spu_cycles(void)1134*4882a593Smuzhiyun static void cell_global_stop_spu_cycles(void)
1135*4882a593Smuzhiyun {
1136*4882a593Smuzhiyun 	int subfunc, rtn_value;
1137*4882a593Smuzhiyun 	unsigned int lfsr_value;
1138*4882a593Smuzhiyun 	int cpu;
1139*4882a593Smuzhiyun 
1140*4882a593Smuzhiyun 	oprofile_running = 0;
1141*4882a593Smuzhiyun 	smp_wmb();
1142*4882a593Smuzhiyun 
1143*4882a593Smuzhiyun #ifdef CONFIG_CPU_FREQ
1144*4882a593Smuzhiyun 	cpufreq_unregister_notifier(&cpu_freq_notifier_block,
1145*4882a593Smuzhiyun 				    CPUFREQ_TRANSITION_NOTIFIER);
1146*4882a593Smuzhiyun #endif
1147*4882a593Smuzhiyun 
1148*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
1149*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
1150*4882a593Smuzhiyun 			continue;
1151*4882a593Smuzhiyun 
1152*4882a593Smuzhiyun 		subfunc = 3;	/*
1153*4882a593Smuzhiyun 				 * 2 - activate SPU tracing,
1154*4882a593Smuzhiyun 				 * 3 - deactivate
1155*4882a593Smuzhiyun 				 */
1156*4882a593Smuzhiyun 		lfsr_value = 0x8f100000;
1157*4882a593Smuzhiyun 
1158*4882a593Smuzhiyun 		rtn_value = rtas_call(spu_rtas_token, 3, 1, NULL,
1159*4882a593Smuzhiyun 				      subfunc, cbe_cpu_to_node(cpu),
1160*4882a593Smuzhiyun 				      lfsr_value);
1161*4882a593Smuzhiyun 
1162*4882a593Smuzhiyun 		if (unlikely(rtn_value != 0)) {
1163*4882a593Smuzhiyun 			printk(KERN_ERR
1164*4882a593Smuzhiyun 			       "%s: rtas call ibm,cbe-spu-perftools " \
1165*4882a593Smuzhiyun 			       "failed, return = %d\n",
1166*4882a593Smuzhiyun 			       __func__, rtn_value);
1167*4882a593Smuzhiyun 		}
1168*4882a593Smuzhiyun 
1169*4882a593Smuzhiyun 		/* Deactivate the signals */
1170*4882a593Smuzhiyun 		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1171*4882a593Smuzhiyun 	}
1172*4882a593Smuzhiyun 
1173*4882a593Smuzhiyun 	stop_spu_profiling_cycles();
1174*4882a593Smuzhiyun }
1175*4882a593Smuzhiyun 
cell_global_stop_spu_events(void)1176*4882a593Smuzhiyun static void cell_global_stop_spu_events(void)
1177*4882a593Smuzhiyun {
1178*4882a593Smuzhiyun 	int cpu;
1179*4882a593Smuzhiyun 	oprofile_running = 0;
1180*4882a593Smuzhiyun 
1181*4882a593Smuzhiyun 	stop_spu_profiling_events();
1182*4882a593Smuzhiyun 	smp_wmb();
1183*4882a593Smuzhiyun 
1184*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
1185*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
1186*4882a593Smuzhiyun 			continue;
1187*4882a593Smuzhiyun 
1188*4882a593Smuzhiyun 		cbe_sync_irq(cbe_cpu_to_node(cpu));
1189*4882a593Smuzhiyun 		/* Stop the counters */
1190*4882a593Smuzhiyun 		cbe_disable_pm(cpu);
1191*4882a593Smuzhiyun 		cbe_write_pm07_control(cpu, 0, 0);
1192*4882a593Smuzhiyun 
1193*4882a593Smuzhiyun 		/* Deactivate the signals */
1194*4882a593Smuzhiyun 		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1195*4882a593Smuzhiyun 
1196*4882a593Smuzhiyun 		/* Deactivate interrupts */
1197*4882a593Smuzhiyun 		cbe_disable_pm_interrupts(cpu);
1198*4882a593Smuzhiyun 	}
1199*4882a593Smuzhiyun 	del_timer_sync(&timer_spu_event_swap);
1200*4882a593Smuzhiyun }
1201*4882a593Smuzhiyun 
cell_global_stop_ppu(void)1202*4882a593Smuzhiyun static void cell_global_stop_ppu(void)
1203*4882a593Smuzhiyun {
1204*4882a593Smuzhiyun 	int cpu;
1205*4882a593Smuzhiyun 
1206*4882a593Smuzhiyun 	/*
1207*4882a593Smuzhiyun 	 * This routine will be called once for the system.
1208*4882a593Smuzhiyun 	 * There is one performance monitor per node, so we
1209*4882a593Smuzhiyun 	 * only need to perform this function once per node.
1210*4882a593Smuzhiyun 	 */
1211*4882a593Smuzhiyun 	del_timer_sync(&timer_virt_cntr);
1212*4882a593Smuzhiyun 	oprofile_running = 0;
1213*4882a593Smuzhiyun 	smp_wmb();
1214*4882a593Smuzhiyun 
1215*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
1216*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
1217*4882a593Smuzhiyun 			continue;
1218*4882a593Smuzhiyun 
1219*4882a593Smuzhiyun 		cbe_sync_irq(cbe_cpu_to_node(cpu));
1220*4882a593Smuzhiyun 		/* Stop the counters */
1221*4882a593Smuzhiyun 		cbe_disable_pm(cpu);
1222*4882a593Smuzhiyun 
1223*4882a593Smuzhiyun 		/* Deactivate the signals */
1224*4882a593Smuzhiyun 		pm_rtas_reset_signals(cbe_cpu_to_node(cpu));
1225*4882a593Smuzhiyun 
1226*4882a593Smuzhiyun 		/* Deactivate interrupts */
1227*4882a593Smuzhiyun 		cbe_disable_pm_interrupts(cpu);
1228*4882a593Smuzhiyun 	}
1229*4882a593Smuzhiyun }
1230*4882a593Smuzhiyun 
cell_global_stop(void)1231*4882a593Smuzhiyun static void cell_global_stop(void)
1232*4882a593Smuzhiyun {
1233*4882a593Smuzhiyun 	if (profiling_mode == PPU_PROFILING)
1234*4882a593Smuzhiyun 		cell_global_stop_ppu();
1235*4882a593Smuzhiyun 	else if (profiling_mode == SPU_PROFILING_EVENTS)
1236*4882a593Smuzhiyun 		cell_global_stop_spu_events();
1237*4882a593Smuzhiyun 	else
1238*4882a593Smuzhiyun 		cell_global_stop_spu_cycles();
1239*4882a593Smuzhiyun }
1240*4882a593Smuzhiyun 
cell_global_start_spu_cycles(struct op_counter_config * ctr)1241*4882a593Smuzhiyun static int cell_global_start_spu_cycles(struct op_counter_config *ctr)
1242*4882a593Smuzhiyun {
1243*4882a593Smuzhiyun 	int subfunc;
1244*4882a593Smuzhiyun 	unsigned int lfsr_value;
1245*4882a593Smuzhiyun 	int cpu;
1246*4882a593Smuzhiyun 	int ret;
1247*4882a593Smuzhiyun 	int rtas_error;
1248*4882a593Smuzhiyun 	unsigned int cpu_khzfreq = 0;
1249*4882a593Smuzhiyun 
1250*4882a593Smuzhiyun 	/* The SPU profiling uses time-based profiling based on
1251*4882a593Smuzhiyun 	 * cpu frequency, so if configured with the CPU_FREQ
1252*4882a593Smuzhiyun 	 * option, we should detect frequency changes and react
1253*4882a593Smuzhiyun 	 * accordingly.
1254*4882a593Smuzhiyun 	 */
1255*4882a593Smuzhiyun #ifdef CONFIG_CPU_FREQ
1256*4882a593Smuzhiyun 	ret = cpufreq_register_notifier(&cpu_freq_notifier_block,
1257*4882a593Smuzhiyun 					CPUFREQ_TRANSITION_NOTIFIER);
1258*4882a593Smuzhiyun 	if (ret < 0)
1259*4882a593Smuzhiyun 		/* this is not a fatal error */
1260*4882a593Smuzhiyun 		printk(KERN_ERR "CPU freq change registration failed: %d\n",
1261*4882a593Smuzhiyun 		       ret);
1262*4882a593Smuzhiyun 
1263*4882a593Smuzhiyun 	else
1264*4882a593Smuzhiyun 		cpu_khzfreq = cpufreq_quick_get(smp_processor_id());
1265*4882a593Smuzhiyun #endif
1266*4882a593Smuzhiyun 
1267*4882a593Smuzhiyun 	set_spu_profiling_frequency(cpu_khzfreq, spu_cycle_reset);
1268*4882a593Smuzhiyun 
1269*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
1270*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
1271*4882a593Smuzhiyun 			continue;
1272*4882a593Smuzhiyun 
1273*4882a593Smuzhiyun 		/*
1274*4882a593Smuzhiyun 		 * Setup SPU cycle-based profiling.
1275*4882a593Smuzhiyun 		 * Set perf_mon_control bit 0 to a zero before
1276*4882a593Smuzhiyun 		 * enabling spu collection hardware.
1277*4882a593Smuzhiyun 		 */
1278*4882a593Smuzhiyun 		cbe_write_pm(cpu, pm_control, 0);
1279*4882a593Smuzhiyun 
1280*4882a593Smuzhiyun 		if (spu_cycle_reset > MAX_SPU_COUNT)
1281*4882a593Smuzhiyun 			/* use largest possible value */
1282*4882a593Smuzhiyun 			lfsr_value = calculate_lfsr(MAX_SPU_COUNT-1);
1283*4882a593Smuzhiyun 		else
1284*4882a593Smuzhiyun 			lfsr_value = calculate_lfsr(spu_cycle_reset);
1285*4882a593Smuzhiyun 
1286*4882a593Smuzhiyun 		/* must use a non zero value. Zero disables data collection. */
1287*4882a593Smuzhiyun 		if (lfsr_value == 0)
1288*4882a593Smuzhiyun 			lfsr_value = calculate_lfsr(1);
1289*4882a593Smuzhiyun 
1290*4882a593Smuzhiyun 		lfsr_value = lfsr_value << 8; /* shift lfsr to correct
1291*4882a593Smuzhiyun 						* register location
1292*4882a593Smuzhiyun 						*/
1293*4882a593Smuzhiyun 
1294*4882a593Smuzhiyun 		/* debug bus setup */
1295*4882a593Smuzhiyun 		ret = pm_rtas_activate_spu_profiling(cbe_cpu_to_node(cpu));
1296*4882a593Smuzhiyun 
1297*4882a593Smuzhiyun 		if (unlikely(ret)) {
1298*4882a593Smuzhiyun 			rtas_error = ret;
1299*4882a593Smuzhiyun 			goto out;
1300*4882a593Smuzhiyun 		}
1301*4882a593Smuzhiyun 
1302*4882a593Smuzhiyun 
1303*4882a593Smuzhiyun 		subfunc = 2;	/* 2 - activate SPU tracing, 3 - deactivate */
1304*4882a593Smuzhiyun 
1305*4882a593Smuzhiyun 		/* start profiling */
1306*4882a593Smuzhiyun 		ret = rtas_call(spu_rtas_token, 3, 1, NULL, subfunc,
1307*4882a593Smuzhiyun 				cbe_cpu_to_node(cpu), lfsr_value);
1308*4882a593Smuzhiyun 
1309*4882a593Smuzhiyun 		if (unlikely(ret != 0)) {
1310*4882a593Smuzhiyun 			printk(KERN_ERR
1311*4882a593Smuzhiyun 			       "%s: rtas call ibm,cbe-spu-perftools failed, " \
1312*4882a593Smuzhiyun 			       "return = %d\n", __func__, ret);
1313*4882a593Smuzhiyun 			rtas_error = -EIO;
1314*4882a593Smuzhiyun 			goto out;
1315*4882a593Smuzhiyun 		}
1316*4882a593Smuzhiyun 	}
1317*4882a593Smuzhiyun 
1318*4882a593Smuzhiyun 	rtas_error = start_spu_profiling_cycles(spu_cycle_reset);
1319*4882a593Smuzhiyun 	if (rtas_error)
1320*4882a593Smuzhiyun 		goto out_stop;
1321*4882a593Smuzhiyun 
1322*4882a593Smuzhiyun 	oprofile_running = 1;
1323*4882a593Smuzhiyun 	return 0;
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun out_stop:
1326*4882a593Smuzhiyun 	cell_global_stop_spu_cycles();	/* clean up the PMU/debug bus */
1327*4882a593Smuzhiyun out:
1328*4882a593Smuzhiyun 	return rtas_error;
1329*4882a593Smuzhiyun }
1330*4882a593Smuzhiyun 
cell_global_start_spu_events(struct op_counter_config * ctr)1331*4882a593Smuzhiyun static int cell_global_start_spu_events(struct op_counter_config *ctr)
1332*4882a593Smuzhiyun {
1333*4882a593Smuzhiyun 	int cpu;
1334*4882a593Smuzhiyun 	u32 interrupt_mask = 0;
1335*4882a593Smuzhiyun 	int rtn = 0;
1336*4882a593Smuzhiyun 
1337*4882a593Smuzhiyun 	hdw_thread = 0;
1338*4882a593Smuzhiyun 
1339*4882a593Smuzhiyun 	/* spu event profiling, uses the performance counters to generate
1340*4882a593Smuzhiyun 	 * an interrupt.  The hardware is setup to store the SPU program
1341*4882a593Smuzhiyun 	 * counter into the trace array.  The occurrence mode is used to
1342*4882a593Smuzhiyun 	 * enable storing data to the trace buffer.  The bits are set
1343*4882a593Smuzhiyun 	 * to send/store the SPU address in the trace buffer.  The debug
1344*4882a593Smuzhiyun 	 * bus must be setup to route the SPU program counter onto the
1345*4882a593Smuzhiyun 	 * debug bus.  The occurrence data in the trace buffer is not used.
1346*4882a593Smuzhiyun 	 */
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 	/* This routine gets called once for the system.
1349*4882a593Smuzhiyun 	 * There is one performance monitor per node, so we
1350*4882a593Smuzhiyun 	 * only need to perform this function once per node.
1351*4882a593Smuzhiyun 	 */
1352*4882a593Smuzhiyun 
1353*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
1354*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
1355*4882a593Smuzhiyun 			continue;
1356*4882a593Smuzhiyun 
1357*4882a593Smuzhiyun 		/*
1358*4882a593Smuzhiyun 		 * Setup SPU event-based profiling.
1359*4882a593Smuzhiyun 		 * Set perf_mon_control bit 0 to a zero before
1360*4882a593Smuzhiyun 		 * enabling spu collection hardware.
1361*4882a593Smuzhiyun 		 *
1362*4882a593Smuzhiyun 		 * Only support one SPU event on one SPU per node.
1363*4882a593Smuzhiyun 		 */
1364*4882a593Smuzhiyun 		if (ctr_enabled & 1) {
1365*4882a593Smuzhiyun 			cbe_write_ctr(cpu, 0, reset_value[0]);
1366*4882a593Smuzhiyun 			enable_ctr(cpu, 0, pm_regs.pm07_cntrl);
1367*4882a593Smuzhiyun 			interrupt_mask |=
1368*4882a593Smuzhiyun 				CBE_PM_CTR_OVERFLOW_INTR(0);
1369*4882a593Smuzhiyun 		} else {
1370*4882a593Smuzhiyun 			/* Disable counter */
1371*4882a593Smuzhiyun 			cbe_write_pm07_control(cpu, 0, 0);
1372*4882a593Smuzhiyun 		}
1373*4882a593Smuzhiyun 
1374*4882a593Smuzhiyun 		cbe_get_and_clear_pm_interrupts(cpu);
1375*4882a593Smuzhiyun 		cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1376*4882a593Smuzhiyun 		cbe_enable_pm(cpu);
1377*4882a593Smuzhiyun 
1378*4882a593Smuzhiyun 		/* clear the trace buffer */
1379*4882a593Smuzhiyun 		cbe_write_pm(cpu, trace_address, 0);
1380*4882a593Smuzhiyun 	}
1381*4882a593Smuzhiyun 
1382*4882a593Smuzhiyun 	/* Start the timer to time slice collecting the event profile
1383*4882a593Smuzhiyun 	 * on each of the SPUs.  Note, can collect profile on one SPU
1384*4882a593Smuzhiyun 	 * per node at a time.
1385*4882a593Smuzhiyun 	 */
1386*4882a593Smuzhiyun 	start_spu_event_swap();
1387*4882a593Smuzhiyun 	start_spu_profiling_events();
1388*4882a593Smuzhiyun 	oprofile_running = 1;
1389*4882a593Smuzhiyun 	smp_wmb();
1390*4882a593Smuzhiyun 
1391*4882a593Smuzhiyun 	return rtn;
1392*4882a593Smuzhiyun }
1393*4882a593Smuzhiyun 
cell_global_start_ppu(struct op_counter_config * ctr)1394*4882a593Smuzhiyun static int cell_global_start_ppu(struct op_counter_config *ctr)
1395*4882a593Smuzhiyun {
1396*4882a593Smuzhiyun 	u32 cpu, i;
1397*4882a593Smuzhiyun 	u32 interrupt_mask = 0;
1398*4882a593Smuzhiyun 
1399*4882a593Smuzhiyun 	/* This routine gets called once for the system.
1400*4882a593Smuzhiyun 	 * There is one performance monitor per node, so we
1401*4882a593Smuzhiyun 	 * only need to perform this function once per node.
1402*4882a593Smuzhiyun 	 */
1403*4882a593Smuzhiyun 	for_each_online_cpu(cpu) {
1404*4882a593Smuzhiyun 		if (cbe_get_hw_thread_id(cpu))
1405*4882a593Smuzhiyun 			continue;
1406*4882a593Smuzhiyun 
1407*4882a593Smuzhiyun 		interrupt_mask = 0;
1408*4882a593Smuzhiyun 
1409*4882a593Smuzhiyun 		for (i = 0; i < num_counters; ++i) {
1410*4882a593Smuzhiyun 			if (ctr_enabled & (1 << i)) {
1411*4882a593Smuzhiyun 				cbe_write_ctr(cpu, i, reset_value[i]);
1412*4882a593Smuzhiyun 				enable_ctr(cpu, i, pm_regs.pm07_cntrl);
1413*4882a593Smuzhiyun 				interrupt_mask |= CBE_PM_CTR_OVERFLOW_INTR(i);
1414*4882a593Smuzhiyun 			} else {
1415*4882a593Smuzhiyun 				/* Disable counter */
1416*4882a593Smuzhiyun 				cbe_write_pm07_control(cpu, i, 0);
1417*4882a593Smuzhiyun 			}
1418*4882a593Smuzhiyun 		}
1419*4882a593Smuzhiyun 
1420*4882a593Smuzhiyun 		cbe_get_and_clear_pm_interrupts(cpu);
1421*4882a593Smuzhiyun 		cbe_enable_pm_interrupts(cpu, hdw_thread, interrupt_mask);
1422*4882a593Smuzhiyun 		cbe_enable_pm(cpu);
1423*4882a593Smuzhiyun 	}
1424*4882a593Smuzhiyun 
1425*4882a593Smuzhiyun 	virt_cntr_inter_mask = interrupt_mask;
1426*4882a593Smuzhiyun 	oprofile_running = 1;
1427*4882a593Smuzhiyun 	smp_wmb();
1428*4882a593Smuzhiyun 
1429*4882a593Smuzhiyun 	/*
1430*4882a593Smuzhiyun 	 * NOTE: start_virt_cntrs will result in cell_virtual_cntr() being
1431*4882a593Smuzhiyun 	 * executed which manipulates the PMU.	We start the "virtual counter"
1432*4882a593Smuzhiyun 	 * here so that we do not need to synchronize access to the PMU in
1433*4882a593Smuzhiyun 	 * the above for-loop.
1434*4882a593Smuzhiyun 	 */
1435*4882a593Smuzhiyun 	start_virt_cntrs();
1436*4882a593Smuzhiyun 
1437*4882a593Smuzhiyun 	return 0;
1438*4882a593Smuzhiyun }
1439*4882a593Smuzhiyun 
cell_global_start(struct op_counter_config * ctr)1440*4882a593Smuzhiyun static int cell_global_start(struct op_counter_config *ctr)
1441*4882a593Smuzhiyun {
1442*4882a593Smuzhiyun 	if (profiling_mode == SPU_PROFILING_CYCLES)
1443*4882a593Smuzhiyun 		return cell_global_start_spu_cycles(ctr);
1444*4882a593Smuzhiyun 	else if (profiling_mode == SPU_PROFILING_EVENTS)
1445*4882a593Smuzhiyun 		return cell_global_start_spu_events(ctr);
1446*4882a593Smuzhiyun 	else
1447*4882a593Smuzhiyun 		return cell_global_start_ppu(ctr);
1448*4882a593Smuzhiyun }
1449*4882a593Smuzhiyun 
1450*4882a593Smuzhiyun 
1451*4882a593Smuzhiyun /* The SPU interrupt handler
1452*4882a593Smuzhiyun  *
1453*4882a593Smuzhiyun  * SPU event profiling works as follows:
1454*4882a593Smuzhiyun  * The pm_signal[0] holds the one SPU event to be measured.  It is routed on
1455*4882a593Smuzhiyun  * the debug bus using word 0 or 1.  The value of pm_signal[1] and
1456*4882a593Smuzhiyun  * pm_signal[2] contain the necessary events to route the SPU program
1457*4882a593Smuzhiyun  * counter for the selected SPU onto the debug bus using words 2 and 3.
1458*4882a593Smuzhiyun  * The pm_interval register is setup to write the SPU PC value into the
1459*4882a593Smuzhiyun  * trace buffer at the maximum rate possible.  The trace buffer is configured
1460*4882a593Smuzhiyun  * to store the PCs, wrapping when it is full.  The performance counter is
1461*4882a593Smuzhiyun  * initialized to the max hardware count minus the number of events, N, between
1462*4882a593Smuzhiyun  * samples.  Once the N events have occurred, a HW counter overflow occurs
1463*4882a593Smuzhiyun  * causing the generation of a HW counter interrupt which also stops the
1464*4882a593Smuzhiyun  * writing of the SPU PC values to the trace buffer.  Hence the last PC
1465*4882a593Smuzhiyun  * written to the trace buffer is the SPU PC that we want.  Unfortunately,
1466*4882a593Smuzhiyun  * we have to read from the beginning of the trace buffer to get to the
1467*4882a593Smuzhiyun  * last value written.  We just hope the PPU has nothing better to do then
1468*4882a593Smuzhiyun  * service this interrupt. The PC for the specific SPU being profiled is
1469*4882a593Smuzhiyun  * extracted from the trace buffer processed and stored.  The trace buffer
1470*4882a593Smuzhiyun  * is cleared, interrupts are cleared, the counter is reset to max - N.
1471*4882a593Smuzhiyun  * A kernel timer is used to periodically call the routine spu_evnt_swap()
1472*4882a593Smuzhiyun  * to switch to the next physical SPU in the node to profile in round robbin
1473*4882a593Smuzhiyun  * order.  This way data is collected for all SPUs on the node. It does mean
1474*4882a593Smuzhiyun  * that we need to use a relatively small value of N to ensure enough samples
1475*4882a593Smuzhiyun  * on each SPU are collected each SPU is being profiled 1/8 of the time.
1476*4882a593Smuzhiyun  * It may also be necessary to use a longer sample collection period.
1477*4882a593Smuzhiyun  */
cell_handle_interrupt_spu(struct pt_regs * regs,struct op_counter_config * ctr)1478*4882a593Smuzhiyun static void cell_handle_interrupt_spu(struct pt_regs *regs,
1479*4882a593Smuzhiyun 				      struct op_counter_config *ctr)
1480*4882a593Smuzhiyun {
1481*4882a593Smuzhiyun 	u32 cpu, cpu_tmp;
1482*4882a593Smuzhiyun 	u64 trace_entry;
1483*4882a593Smuzhiyun 	u32 interrupt_mask;
1484*4882a593Smuzhiyun 	u64 trace_buffer[2];
1485*4882a593Smuzhiyun 	u64 last_trace_buffer;
1486*4882a593Smuzhiyun 	u32 sample;
1487*4882a593Smuzhiyun 	u32 trace_addr;
1488*4882a593Smuzhiyun 	unsigned long sample_array_lock_flags;
1489*4882a593Smuzhiyun 	int spu_num;
1490*4882a593Smuzhiyun 	unsigned long flags;
1491*4882a593Smuzhiyun 
1492*4882a593Smuzhiyun 	/* Make sure spu event interrupt handler and spu event swap
1493*4882a593Smuzhiyun 	 * don't access the counters simultaneously.
1494*4882a593Smuzhiyun 	 */
1495*4882a593Smuzhiyun 	cpu = smp_processor_id();
1496*4882a593Smuzhiyun 	spin_lock_irqsave(&cntr_lock, flags);
1497*4882a593Smuzhiyun 
1498*4882a593Smuzhiyun 	cpu_tmp = cpu;
1499*4882a593Smuzhiyun 	cbe_disable_pm(cpu);
1500*4882a593Smuzhiyun 
1501*4882a593Smuzhiyun 	interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1502*4882a593Smuzhiyun 
1503*4882a593Smuzhiyun 	sample = 0xABCDEF;
1504*4882a593Smuzhiyun 	trace_entry = 0xfedcba;
1505*4882a593Smuzhiyun 	last_trace_buffer = 0xdeadbeaf;
1506*4882a593Smuzhiyun 
1507*4882a593Smuzhiyun 	if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1508*4882a593Smuzhiyun 		/* disable writes to trace buff */
1509*4882a593Smuzhiyun 		cbe_write_pm(cpu, pm_interval, 0);
1510*4882a593Smuzhiyun 
1511*4882a593Smuzhiyun 		/* only have one perf cntr being used, cntr 0 */
1512*4882a593Smuzhiyun 		if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(0))
1513*4882a593Smuzhiyun 		    && ctr[0].enabled)
1514*4882a593Smuzhiyun 			/* The SPU PC values will be read
1515*4882a593Smuzhiyun 			 * from the trace buffer, reset counter
1516*4882a593Smuzhiyun 			 */
1517*4882a593Smuzhiyun 
1518*4882a593Smuzhiyun 			cbe_write_ctr(cpu, 0, reset_value[0]);
1519*4882a593Smuzhiyun 
1520*4882a593Smuzhiyun 		trace_addr = cbe_read_pm(cpu, trace_address);
1521*4882a593Smuzhiyun 
1522*4882a593Smuzhiyun 		while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) {
1523*4882a593Smuzhiyun 			/* There is data in the trace buffer to process
1524*4882a593Smuzhiyun 			 * Read the buffer until you get to the last
1525*4882a593Smuzhiyun 			 * entry.  This is the value we want.
1526*4882a593Smuzhiyun 			 */
1527*4882a593Smuzhiyun 
1528*4882a593Smuzhiyun 			cbe_read_trace_buffer(cpu, trace_buffer);
1529*4882a593Smuzhiyun 			trace_addr = cbe_read_pm(cpu, trace_address);
1530*4882a593Smuzhiyun 		}
1531*4882a593Smuzhiyun 
1532*4882a593Smuzhiyun 		/* SPU Address 16 bit count format for 128 bit
1533*4882a593Smuzhiyun 		 * HW trace buffer is used for the SPU PC storage
1534*4882a593Smuzhiyun 		 *    HDR bits          0:15
1535*4882a593Smuzhiyun 		 *    SPU Addr 0 bits   16:31
1536*4882a593Smuzhiyun 		 *    SPU Addr 1 bits   32:47
1537*4882a593Smuzhiyun 		 *    unused bits       48:127
1538*4882a593Smuzhiyun 		 *
1539*4882a593Smuzhiyun 		 * HDR: bit4 = 1 SPU Address 0 valid
1540*4882a593Smuzhiyun 		 * HDR: bit5 = 1 SPU Address 1 valid
1541*4882a593Smuzhiyun 		 *  - unfortunately, the valid bits don't seem to work
1542*4882a593Smuzhiyun 		 *
1543*4882a593Smuzhiyun 		 * Note trace_buffer[0] holds bits 0:63 of the HW
1544*4882a593Smuzhiyun 		 * trace buffer, trace_buffer[1] holds bits 64:127
1545*4882a593Smuzhiyun 		 */
1546*4882a593Smuzhiyun 
1547*4882a593Smuzhiyun 		trace_entry = trace_buffer[0]
1548*4882a593Smuzhiyun 			& 0x00000000FFFF0000;
1549*4882a593Smuzhiyun 
1550*4882a593Smuzhiyun 		/* only top 16 of the 18 bit SPU PC address
1551*4882a593Smuzhiyun 		 * is stored in trace buffer, hence shift right
1552*4882a593Smuzhiyun 		 * by 16 -2 bits */
1553*4882a593Smuzhiyun 		sample = trace_entry >> 14;
1554*4882a593Smuzhiyun 		last_trace_buffer = trace_buffer[0];
1555*4882a593Smuzhiyun 
1556*4882a593Smuzhiyun 		spu_num = spu_evnt_phys_spu_indx
1557*4882a593Smuzhiyun 			+ (cbe_cpu_to_node(cpu) * NUM_SPUS_PER_NODE);
1558*4882a593Smuzhiyun 
1559*4882a593Smuzhiyun 		/* make sure only one process at a time is calling
1560*4882a593Smuzhiyun 		 * spu_sync_buffer()
1561*4882a593Smuzhiyun 		 */
1562*4882a593Smuzhiyun 		spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
1563*4882a593Smuzhiyun 				  sample_array_lock_flags);
1564*4882a593Smuzhiyun 		spu_sync_buffer(spu_num, &sample, 1);
1565*4882a593Smuzhiyun 		spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
1566*4882a593Smuzhiyun 				       sample_array_lock_flags);
1567*4882a593Smuzhiyun 
1568*4882a593Smuzhiyun 		smp_wmb();    /* insure spu event buffer updates are written
1569*4882a593Smuzhiyun 			       * don't want events intermingled... */
1570*4882a593Smuzhiyun 
1571*4882a593Smuzhiyun 		/* The counters were frozen by the interrupt.
1572*4882a593Smuzhiyun 		 * Reenable the interrupt and restart the counters.
1573*4882a593Smuzhiyun 		 */
1574*4882a593Smuzhiyun 		cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1575*4882a593Smuzhiyun 		cbe_enable_pm_interrupts(cpu, hdw_thread,
1576*4882a593Smuzhiyun 					 virt_cntr_inter_mask);
1577*4882a593Smuzhiyun 
1578*4882a593Smuzhiyun 		/* clear the trace buffer, re-enable writes to trace buff */
1579*4882a593Smuzhiyun 		cbe_write_pm(cpu, trace_address, 0);
1580*4882a593Smuzhiyun 		cbe_write_pm(cpu, pm_interval, NUM_INTERVAL_CYC);
1581*4882a593Smuzhiyun 
1582*4882a593Smuzhiyun 		/* The writes to the various performance counters only writes
1583*4882a593Smuzhiyun 		 * to a latch.  The new values (interrupt setting bits, reset
1584*4882a593Smuzhiyun 		 * counter value etc.) are not copied to the actual registers
1585*4882a593Smuzhiyun 		 * until the performance monitor is enabled.  In order to get
1586*4882a593Smuzhiyun 		 * this to work as desired, the performance monitor needs to
1587*4882a593Smuzhiyun 		 * be disabled while writing to the latches.  This is a
1588*4882a593Smuzhiyun 		 * HW design issue.
1589*4882a593Smuzhiyun 		 */
1590*4882a593Smuzhiyun 		write_pm_cntrl(cpu);
1591*4882a593Smuzhiyun 		cbe_enable_pm(cpu);
1592*4882a593Smuzhiyun 	}
1593*4882a593Smuzhiyun 	spin_unlock_irqrestore(&cntr_lock, flags);
1594*4882a593Smuzhiyun }
1595*4882a593Smuzhiyun 
cell_handle_interrupt_ppu(struct pt_regs * regs,struct op_counter_config * ctr)1596*4882a593Smuzhiyun static void cell_handle_interrupt_ppu(struct pt_regs *regs,
1597*4882a593Smuzhiyun 				      struct op_counter_config *ctr)
1598*4882a593Smuzhiyun {
1599*4882a593Smuzhiyun 	u32 cpu;
1600*4882a593Smuzhiyun 	u64 pc;
1601*4882a593Smuzhiyun 	int is_kernel;
1602*4882a593Smuzhiyun 	unsigned long flags = 0;
1603*4882a593Smuzhiyun 	u32 interrupt_mask;
1604*4882a593Smuzhiyun 	int i;
1605*4882a593Smuzhiyun 
1606*4882a593Smuzhiyun 	cpu = smp_processor_id();
1607*4882a593Smuzhiyun 
1608*4882a593Smuzhiyun 	/*
1609*4882a593Smuzhiyun 	 * Need to make sure the interrupt handler and the virt counter
1610*4882a593Smuzhiyun 	 * routine are not running at the same time. See the
1611*4882a593Smuzhiyun 	 * cell_virtual_cntr() routine for additional comments.
1612*4882a593Smuzhiyun 	 */
1613*4882a593Smuzhiyun 	spin_lock_irqsave(&cntr_lock, flags);
1614*4882a593Smuzhiyun 
1615*4882a593Smuzhiyun 	/*
1616*4882a593Smuzhiyun 	 * Need to disable and reenable the performance counters
1617*4882a593Smuzhiyun 	 * to get the desired behavior from the hardware.  This
1618*4882a593Smuzhiyun 	 * is hardware specific.
1619*4882a593Smuzhiyun 	 */
1620*4882a593Smuzhiyun 
1621*4882a593Smuzhiyun 	cbe_disable_pm(cpu);
1622*4882a593Smuzhiyun 
1623*4882a593Smuzhiyun 	interrupt_mask = cbe_get_and_clear_pm_interrupts(cpu);
1624*4882a593Smuzhiyun 
1625*4882a593Smuzhiyun 	/*
1626*4882a593Smuzhiyun 	 * If the interrupt mask has been cleared, then the virt cntr
1627*4882a593Smuzhiyun 	 * has cleared the interrupt.  When the thread that generated
1628*4882a593Smuzhiyun 	 * the interrupt is restored, the data count will be restored to
1629*4882a593Smuzhiyun 	 * 0xffffff0 to cause the interrupt to be regenerated.
1630*4882a593Smuzhiyun 	 */
1631*4882a593Smuzhiyun 
1632*4882a593Smuzhiyun 	if ((oprofile_running == 1) && (interrupt_mask != 0)) {
1633*4882a593Smuzhiyun 		pc = regs->nip;
1634*4882a593Smuzhiyun 		is_kernel = is_kernel_addr(pc);
1635*4882a593Smuzhiyun 
1636*4882a593Smuzhiyun 		for (i = 0; i < num_counters; ++i) {
1637*4882a593Smuzhiyun 			if ((interrupt_mask & CBE_PM_CTR_OVERFLOW_INTR(i))
1638*4882a593Smuzhiyun 			    && ctr[i].enabled) {
1639*4882a593Smuzhiyun 				oprofile_add_ext_sample(pc, regs, i, is_kernel);
1640*4882a593Smuzhiyun 				cbe_write_ctr(cpu, i, reset_value[i]);
1641*4882a593Smuzhiyun 			}
1642*4882a593Smuzhiyun 		}
1643*4882a593Smuzhiyun 
1644*4882a593Smuzhiyun 		/*
1645*4882a593Smuzhiyun 		 * The counters were frozen by the interrupt.
1646*4882a593Smuzhiyun 		 * Reenable the interrupt and restart the counters.
1647*4882a593Smuzhiyun 		 * If there was a race between the interrupt handler and
1648*4882a593Smuzhiyun 		 * the virtual counter routine.	 The virtual counter
1649*4882a593Smuzhiyun 		 * routine may have cleared the interrupts.  Hence must
1650*4882a593Smuzhiyun 		 * use the virt_cntr_inter_mask to re-enable the interrupts.
1651*4882a593Smuzhiyun 		 */
1652*4882a593Smuzhiyun 		cbe_enable_pm_interrupts(cpu, hdw_thread,
1653*4882a593Smuzhiyun 					 virt_cntr_inter_mask);
1654*4882a593Smuzhiyun 
1655*4882a593Smuzhiyun 		/*
1656*4882a593Smuzhiyun 		 * The writes to the various performance counters only writes
1657*4882a593Smuzhiyun 		 * to a latch.	The new values (interrupt setting bits, reset
1658*4882a593Smuzhiyun 		 * counter value etc.) are not copied to the actual registers
1659*4882a593Smuzhiyun 		 * until the performance monitor is enabled.  In order to get
1660*4882a593Smuzhiyun 		 * this to work as desired, the performance monitor needs to
1661*4882a593Smuzhiyun 		 * be disabled while writing to the latches.  This is a
1662*4882a593Smuzhiyun 		 * HW design issue.
1663*4882a593Smuzhiyun 		 */
1664*4882a593Smuzhiyun 		cbe_enable_pm(cpu);
1665*4882a593Smuzhiyun 	}
1666*4882a593Smuzhiyun 	spin_unlock_irqrestore(&cntr_lock, flags);
1667*4882a593Smuzhiyun }
1668*4882a593Smuzhiyun 
cell_handle_interrupt(struct pt_regs * regs,struct op_counter_config * ctr)1669*4882a593Smuzhiyun static void cell_handle_interrupt(struct pt_regs *regs,
1670*4882a593Smuzhiyun 				  struct op_counter_config *ctr)
1671*4882a593Smuzhiyun {
1672*4882a593Smuzhiyun 	if (profiling_mode == PPU_PROFILING)
1673*4882a593Smuzhiyun 		cell_handle_interrupt_ppu(regs, ctr);
1674*4882a593Smuzhiyun 	else
1675*4882a593Smuzhiyun 		cell_handle_interrupt_spu(regs, ctr);
1676*4882a593Smuzhiyun }
1677*4882a593Smuzhiyun 
1678*4882a593Smuzhiyun /*
1679*4882a593Smuzhiyun  * This function is called from the generic OProfile
1680*4882a593Smuzhiyun  * driver.  When profiling PPUs, we need to do the
1681*4882a593Smuzhiyun  * generic sync start; otherwise, do spu_sync_start.
1682*4882a593Smuzhiyun  */
cell_sync_start(void)1683*4882a593Smuzhiyun static int cell_sync_start(void)
1684*4882a593Smuzhiyun {
1685*4882a593Smuzhiyun 	if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1686*4882a593Smuzhiyun 	    (profiling_mode == SPU_PROFILING_EVENTS))
1687*4882a593Smuzhiyun 		return spu_sync_start();
1688*4882a593Smuzhiyun 	else
1689*4882a593Smuzhiyun 		return DO_GENERIC_SYNC;
1690*4882a593Smuzhiyun }
1691*4882a593Smuzhiyun 
cell_sync_stop(void)1692*4882a593Smuzhiyun static int cell_sync_stop(void)
1693*4882a593Smuzhiyun {
1694*4882a593Smuzhiyun 	if ((profiling_mode == SPU_PROFILING_CYCLES) ||
1695*4882a593Smuzhiyun 	    (profiling_mode == SPU_PROFILING_EVENTS))
1696*4882a593Smuzhiyun 		return spu_sync_stop();
1697*4882a593Smuzhiyun 	else
1698*4882a593Smuzhiyun 		return 1;
1699*4882a593Smuzhiyun }
1700*4882a593Smuzhiyun 
1701*4882a593Smuzhiyun struct op_powerpc_model op_model_cell = {
1702*4882a593Smuzhiyun 	.reg_setup = cell_reg_setup,
1703*4882a593Smuzhiyun 	.cpu_setup = cell_cpu_setup,
1704*4882a593Smuzhiyun 	.global_start = cell_global_start,
1705*4882a593Smuzhiyun 	.global_stop = cell_global_stop,
1706*4882a593Smuzhiyun 	.sync_start = cell_sync_start,
1707*4882a593Smuzhiyun 	.sync_stop = cell_sync_stop,
1708*4882a593Smuzhiyun 	.handle_interrupt = cell_handle_interrupt,
1709*4882a593Smuzhiyun };
1710