xref: /utopia/UTPA2-700.0.x/projects/tools/lint/aeon_include/machine/spr_defs.h (revision 53ee8cc121a030b8d368113ac3e966b4705770ef)
1*53ee8cc1Swenshuai.xi #ifndef __SPR_DEFS_H
2*53ee8cc1Swenshuai.xi #define __SPR_DEFS_H
3*53ee8cc1Swenshuai.xi 
4*53ee8cc1Swenshuai.xi #if 1 //def __aeon__
5*53ee8cc1Swenshuai.xi #define MAX_GRPS (32)
6*53ee8cc1Swenshuai.xi #define MAX_SPRS_PER_GRP_BITS (11)
7*53ee8cc1Swenshuai.xi #define MAX_SPRS_PER_GRP (1 << MAX_SPRS_PER_GRP_BITS)
8*53ee8cc1Swenshuai.xi #define MAX_SPRS (0x10000)
9*53ee8cc1Swenshuai.xi #define SPR_GRP_OFF(spr)    ((spr) & (MAX_SPRS_PER_GRP - 1))
10*53ee8cc1Swenshuai.xi #define SPR_GRP(spr)        ((spr)&(-MAX_SPRS_PER_GRP))
11*53ee8cc1Swenshuai.xi 
12*53ee8cc1Swenshuai.xi /* Base addresses for the groups */
13*53ee8cc1Swenshuai.xi #define SPRGROUP_SYS	(0<< MAX_SPRS_PER_GRP_BITS)
14*53ee8cc1Swenshuai.xi #define SPRGROUP_DMMU	(1<< MAX_SPRS_PER_GRP_BITS)
15*53ee8cc1Swenshuai.xi #define SPRGROUP_IMMU	(2<< MAX_SPRS_PER_GRP_BITS)
16*53ee8cc1Swenshuai.xi #define SPRGROUP_DC	(3<< MAX_SPRS_PER_GRP_BITS)
17*53ee8cc1Swenshuai.xi #define SPRGROUP_IC	(4<< MAX_SPRS_PER_GRP_BITS)
18*53ee8cc1Swenshuai.xi #define SPRGROUP_MAC	(5<< MAX_SPRS_PER_GRP_BITS)
19*53ee8cc1Swenshuai.xi #define SPRGROUP_D	(6<< MAX_SPRS_PER_GRP_BITS)
20*53ee8cc1Swenshuai.xi #define SPRGROUP_PC	(7<< MAX_SPRS_PER_GRP_BITS)
21*53ee8cc1Swenshuai.xi #define SPRGROUP_PM	(8<< MAX_SPRS_PER_GRP_BITS)
22*53ee8cc1Swenshuai.xi #define SPRGROUP_PIC	(9<< MAX_SPRS_PER_GRP_BITS)
23*53ee8cc1Swenshuai.xi #define SPRGROUP_TT	(10<< MAX_SPRS_PER_GRP_BITS)
24*53ee8cc1Swenshuai.xi #define SPRGROUP_ACCEL	(11<< MAX_SPRS_PER_GRP_BITS)
25*53ee8cc1Swenshuai.xi 
26*53ee8cc1Swenshuai.xi #define SPR_GROUP_SYS   SPRGROUP_SYS
27*53ee8cc1Swenshuai.xi #define SPR_GROUP_PM    SPRGROUP_PM
28*53ee8cc1Swenshuai.xi #define SPR_GROUP_PIC   SPRGROUP_PIC
29*53ee8cc1Swenshuai.xi #define SPR_GROUP_TT    SPRGROUP_TT
30*53ee8cc1Swenshuai.xi 
31*53ee8cc1Swenshuai.xi /* System control and status group */
32*53ee8cc1Swenshuai.xi #define SPR_VR		(SPRGROUP_SYS + 0)
33*53ee8cc1Swenshuai.xi #define SPR_UPR		(SPRGROUP_SYS + 1)
34*53ee8cc1Swenshuai.xi #define SPR_CPUCFGR	(SPRGROUP_SYS + 2)
35*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR	(SPRGROUP_SYS + 3)
36*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR	(SPRGROUP_SYS + 4)
37*53ee8cc1Swenshuai.xi #define SPR_DCCFGR	(SPRGROUP_SYS + 5)
38*53ee8cc1Swenshuai.xi #define SPR_ICCFGR	(SPRGROUP_SYS + 6)
39*53ee8cc1Swenshuai.xi #define SPR_DCFGR	(SPRGROUP_SYS + 7)
40*53ee8cc1Swenshuai.xi #define SPR_PCCFGR	(SPRGROUP_SYS + 8)
41*53ee8cc1Swenshuai.xi #define SPR_NPC         (SPRGROUP_SYS + 16)  /* CZ 21/06/01 */
42*53ee8cc1Swenshuai.xi #define SPR_DRETURN	(SPRGROUP_SYS + 16)  /* CZ 21/06/01 */
43*53ee8cc1Swenshuai.xi #define SPR_SR		(SPRGROUP_SYS + 17)  /* CZ 21/06/01 */
44*53ee8cc1Swenshuai.xi #define SPR_PPC         (SPRGROUP_SYS + 18)  /* CZ 21/06/01 */
45*53ee8cc1Swenshuai.xi #define SPR_EPCR_BASE	(SPRGROUP_SYS + 32)  /* CZ 21/06/01 */
46*53ee8cc1Swenshuai.xi #define SPR_EPCR_LAST	(SPRGROUP_SYS + 47)  /* CZ 21/06/01 */
47*53ee8cc1Swenshuai.xi #define SPR_EEAR_BASE	(SPRGROUP_SYS + 48)
48*53ee8cc1Swenshuai.xi #define SPR_EEAR_LAST	(SPRGROUP_SYS + 63)
49*53ee8cc1Swenshuai.xi #define SPR_ESR_BASE	(SPRGROUP_SYS + 64)
50*53ee8cc1Swenshuai.xi #define SPR_ESR_LAST	(SPRGROUP_SYS + 79)
51*53ee8cc1Swenshuai.xi #define SPR_GPR(x)	(SPRGROUP_SYS + 0x400 + (x))
52*53ee8cc1Swenshuai.xi 
53*53ee8cc1Swenshuai.xi #define SPR_SYS_SR			SPR_GRP_OFF(SPR_SR)
54*53ee8cc1Swenshuai.xi #define SPR_SYS_ESR_BASE	SPR_GRP_OFF(SPR_ESR_BASE)
55*53ee8cc1Swenshuai.xi #define SPR_SYS_ESR_LAST	SPR_GRP_OFF(SPR_ESR_LAST)
56*53ee8cc1Swenshuai.xi 
57*53ee8cc1Swenshuai.xi /* Data MMU group */
58*53ee8cc1Swenshuai.xi #define SPR_DMMUCR	(SPRGROUP_DMMU + 0)
59*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_BASE(WAY)	(SPRGROUP_DMMU + 0x200 + (WAY) * 0x100)
60*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_LAST(WAY)	(SPRGROUP_DMMU + 0x27f + (WAY) * 0x100)
61*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_BASE(WAY)	(SPRGROUP_DMMU + 0x280 + (WAY) * 0x100)
62*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_LAST(WAY)	(SPRGROUP_DMMU + 0x2ff + (WAY) * 0x100)
63*53ee8cc1Swenshuai.xi 
64*53ee8cc1Swenshuai.xi /* Instruction MMU group */
65*53ee8cc1Swenshuai.xi #define SPR_IMMUCR	(SPRGROUP_IMMU + 0)
66*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_BASE(WAY)	(SPRGROUP_IMMU + 0x200 + (WAY) * 0x100)
67*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_LAST(WAY)	(SPRGROUP_IMMU + 0x27f + (WAY) * 0x100)
68*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_BASE(WAY)	(SPRGROUP_IMMU + 0x280 + (WAY) * 0x100)
69*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_LAST(WAY)	(SPRGROUP_IMMU + 0x2ff + (WAY) * 0x100)
70*53ee8cc1Swenshuai.xi 
71*53ee8cc1Swenshuai.xi /* Data cache group */
72*53ee8cc1Swenshuai.xi #define SPR_DCCR	(SPRGROUP_DC + 0)
73*53ee8cc1Swenshuai.xi #define SPR_DCBPR	(SPRGROUP_DC + 1)
74*53ee8cc1Swenshuai.xi #define SPR_DCBFR	(SPRGROUP_DC + 2)
75*53ee8cc1Swenshuai.xi #define SPR_DCBIR	(SPRGROUP_DC + 3)
76*53ee8cc1Swenshuai.xi #define SPR_DCBWR	(SPRGROUP_DC + 4)
77*53ee8cc1Swenshuai.xi #define SPR_DCBLR	(SPRGROUP_DC + 5)
78*53ee8cc1Swenshuai.xi #define SPR_DCR_BASE(WAY)	(SPRGROUP_DC + 0x200 + (WAY) * 0x200)
79*53ee8cc1Swenshuai.xi #define SPR_DCR_LAST(WAY)	(SPRGROUP_DC + 0x3ff + (WAY) * 0x200)
80*53ee8cc1Swenshuai.xi 
81*53ee8cc1Swenshuai.xi /* Instruction cache group */
82*53ee8cc1Swenshuai.xi #define SPR_ICCR	(SPRGROUP_IC + 0)
83*53ee8cc1Swenshuai.xi #define SPR_ICBPR	(SPRGROUP_IC + 1)
84*53ee8cc1Swenshuai.xi #define SPR_ICBIR	(SPRGROUP_IC + 2)
85*53ee8cc1Swenshuai.xi #define SPR_ICBLR	(SPRGROUP_IC + 3)
86*53ee8cc1Swenshuai.xi #define SPR_ICR_BASE(WAY)	(SPRGROUP_IC + 0x200 + (WAY) * 0x200)
87*53ee8cc1Swenshuai.xi #define SPR_ICR_LAST(WAY)	(SPRGROUP_IC + 0x3ff + (WAY) * 0x200)
88*53ee8cc1Swenshuai.xi 
89*53ee8cc1Swenshuai.xi /* MAC group */
90*53ee8cc1Swenshuai.xi #define SPR_MACLO 	  (SPRGROUP_MAC + 1)
91*53ee8cc1Swenshuai.xi #define SPR_MACHI 	  (SPRGROUP_MAC + 2)
92*53ee8cc1Swenshuai.xi #define SPR_MACHI2    (SPRGROUP_MAC + 3)
93*53ee8cc1Swenshuai.xi 
94*53ee8cc1Swenshuai.xi #define SPR_AMULHI	  (SPRGROUP_MAC + 8)
95*53ee8cc1Swenshuai.xi #define SPR_AMACLO0	  (SPRGROUP_MAC + 9)
96*53ee8cc1Swenshuai.xi #define SPR_AMACHI0	  (SPRGROUP_MAC + 10)
97*53ee8cc1Swenshuai.xi #define SPR_AMACLO1	  (SPRGROUP_MAC + 11)
98*53ee8cc1Swenshuai.xi #define SPR_AMACHI1	  (SPRGROUP_MAC + 12)
99*53ee8cc1Swenshuai.xi #define SPR_AMACCFG   (SPRGROUP_MAC + 13)
100*53ee8cc1Swenshuai.xi #define SPR_AMACSTATUS  (SPRGROUP_MAC + 14)
101*53ee8cc1Swenshuai.xi 
102*53ee8cc1Swenshuai.xi #define SPR_AMACGUARD0 (SPRGROUP_MAC + 5)
103*53ee8cc1Swenshuai.xi #define SPR_AMACGUARD1 (SPRGROUP_MAC + 6)
104*53ee8cc1Swenshuai.xi 
105*53ee8cc1Swenshuai.xi #define SPR_AMACQ0					(SPRGROUP_MAC + 16)
106*53ee8cc1Swenshuai.xi #define SPR_AMACQ0_ADDROFS			(SPRGROUP_MAC + 17)
107*53ee8cc1Swenshuai.xi #define SPR_AMACQ0_STRIDE			(SPRGROUP_MAC + 18)
108*53ee8cc1Swenshuai.xi #define SPR_AMACQ0_STRIDE2			(SPRGROUP_MAC + 19)
109*53ee8cc1Swenshuai.xi #define SPR_AMACQ0_ADDROFS_STRIDE	(SPRGROUP_MAC + 20)
110*53ee8cc1Swenshuai.xi 
111*53ee8cc1Swenshuai.xi #define SPR_AMACQ1					(SPRGROUP_MAC + 24)
112*53ee8cc1Swenshuai.xi #define SPR_AMACQ1_ADDROFS			(SPRGROUP_MAC + 25)
113*53ee8cc1Swenshuai.xi #define SPR_AMACQ1_STRIDE			(SPRGROUP_MAC + 26)
114*53ee8cc1Swenshuai.xi #define SPR_AMACQ1_STRIDE2			(SPRGROUP_MAC + 27)
115*53ee8cc1Swenshuai.xi #define SPR_AMACQ1_ADDROFS_STRIDE	(SPRGROUP_MAC + 28)
116*53ee8cc1Swenshuai.xi 
117*53ee8cc1Swenshuai.xi #define AMACQSTRIDE_BITS	8
118*53ee8cc1Swenshuai.xi #define MK_QADDR_STRIDE(addr_offset, stride, stride2)    \
119*53ee8cc1Swenshuai.xi     ( ((unsigned)(addr_offset)<<(AMACQSTRIDE_BITS+AMACQSTRIDE_BITS)) |  \
120*53ee8cc1Swenshuai.xi       (((unsigned)(stride2) & ((1<<AMACQSTRIDE_BITS)-1)) << AMACQSTRIDE_BITS) | \
121*53ee8cc1Swenshuai.xi       ((unsigned)(stride) & ((1<<AMACQSTRIDE_BITS)-1)) )
122*53ee8cc1Swenshuai.xi 
123*53ee8cc1Swenshuai.xi typedef struct
124*53ee8cc1Swenshuai.xi {
125*53ee8cc1Swenshuai.xi 	union
126*53ee8cc1Swenshuai.xi 	{
127*53ee8cc1Swenshuai.xi 		struct
128*53ee8cc1Swenshuai.xi 		{
129*53ee8cc1Swenshuai.xi #if defined(HAS_EXECUTION) && defined(WORDS_BIGENDIAN)
130*53ee8cc1Swenshuai.xi 			unsigned addr_offset:32-2*AMACQSTRIDE_BITS;
131*53ee8cc1Swenshuai.xi 			int stride2:AMACQSTRIDE_BITS;	// need to multiply by sizeof(item)
132*53ee8cc1Swenshuai.xi 			int stride:AMACQSTRIDE_BITS;	// need to multiply by sizeof(item)
133*53ee8cc1Swenshuai.xi #else
134*53ee8cc1Swenshuai.xi 			int stride:AMACQSTRIDE_BITS;
135*53ee8cc1Swenshuai.xi 			int stride2:AMACQSTRIDE_BITS;
136*53ee8cc1Swenshuai.xi 			unsigned addr_offset:32-2*AMACQSTRIDE_BITS;
137*53ee8cc1Swenshuai.xi #endif
138*53ee8cc1Swenshuai.xi 		};
139*53ee8cc1Swenshuai.xi 		unsigned v;
140*53ee8cc1Swenshuai.xi 	};
141*53ee8cc1Swenshuai.xi } qaddr_stride_t;
142*53ee8cc1Swenshuai.xi 
143*53ee8cc1Swenshuai.xi typedef enum
144*53ee8cc1Swenshuai.xi {
145*53ee8cc1Swenshuai.xi     amac_round_to_infinity = 0,
146*53ee8cc1Swenshuai.xi     amac_round_away_from_zero = 1
147*53ee8cc1Swenshuai.xi } amac_round_mode_e;
148*53ee8cc1Swenshuai.xi 
149*53ee8cc1Swenshuai.xi typedef enum
150*53ee8cc1Swenshuai.xi {
151*53ee8cc1Swenshuai.xi     amac_saturate_dont_care = 0,
152*53ee8cc1Swenshuai.xi     amac_saturate_to_64bit_before_shift = 0,
153*53ee8cc1Swenshuai.xi     amac_saturate_to_64bit_after_shift = 1,
154*53ee8cc1Swenshuai.xi     amac_saturate_to_32bit_after_shift = 2,
155*53ee8cc1Swenshuai.xi     amac_saturate_to_16bit_after_shift = 3
156*53ee8cc1Swenshuai.xi } amac_saturate_mode_e;
157*53ee8cc1Swenshuai.xi 
158*53ee8cc1Swenshuai.xi typedef struct
159*53ee8cc1Swenshuai.xi {
160*53ee8cc1Swenshuai.xi 	union
161*53ee8cc1Swenshuai.xi 	{
162*53ee8cc1Swenshuai.xi 		struct
163*53ee8cc1Swenshuai.xi 		{
164*53ee8cc1Swenshuai.xi #if defined(HAS_EXECUTION) && defined(WORDS_BIGENDIAN)
165*53ee8cc1Swenshuai.xi             unsigned __resv4:13;
166*53ee8cc1Swenshuai.xi             unsigned a1_round_mode:1;
167*53ee8cc1Swenshuai.xi             unsigned a1_saturate_mode:2;
168*53ee8cc1Swenshuai.xi             unsigned __resv3:5;
169*53ee8cc1Swenshuai.xi             unsigned a0_round_mode:1;
170*53ee8cc1Swenshuai.xi             unsigned a0_saturate_mode:2;
171*53ee8cc1Swenshuai.xi             unsigned __resv2:5;
172*53ee8cc1Swenshuai.xi             unsigned fmode:1;
173*53ee8cc1Swenshuai.xi             unsigned __resv1:1;
174*53ee8cc1Swenshuai.xi 			unsigned enabled:1;
175*53ee8cc1Swenshuai.xi #else
176*53ee8cc1Swenshuai.xi 			unsigned enabled:1;
177*53ee8cc1Swenshuai.xi             unsigned __resv1:1;
178*53ee8cc1Swenshuai.xi             unsigned fmode:1;
179*53ee8cc1Swenshuai.xi             unsigned __resv2:5;
180*53ee8cc1Swenshuai.xi             unsigned a0_saturate_mode:2;
181*53ee8cc1Swenshuai.xi             unsigned a0_round_mode:1;
182*53ee8cc1Swenshuai.xi             unsigned __resv3:5;
183*53ee8cc1Swenshuai.xi             unsigned a1_saturate_mode:2;
184*53ee8cc1Swenshuai.xi             unsigned a1_round_mode:1;
185*53ee8cc1Swenshuai.xi             unsigned __resv4:13;
186*53ee8cc1Swenshuai.xi #endif
187*53ee8cc1Swenshuai.xi 		};
188*53ee8cc1Swenshuai.xi 		unsigned v;
189*53ee8cc1Swenshuai.xi 	};
190*53ee8cc1Swenshuai.xi } amac_cfg_t;
191*53ee8cc1Swenshuai.xi 
192*53ee8cc1Swenshuai.xi typedef struct
193*53ee8cc1Swenshuai.xi {
194*53ee8cc1Swenshuai.xi 	union
195*53ee8cc1Swenshuai.xi 	{
196*53ee8cc1Swenshuai.xi 		struct
197*53ee8cc1Swenshuai.xi 		{
198*53ee8cc1Swenshuai.xi #if defined(HAS_EXECUTION) && defined(WORDS_BIGENDIAN)
199*53ee8cc1Swenshuai.xi             unsigned __resv1:31;
200*53ee8cc1Swenshuai.xi 			unsigned ov:1;
201*53ee8cc1Swenshuai.xi #else
202*53ee8cc1Swenshuai.xi 			unsigned ov:1;
203*53ee8cc1Swenshuai.xi             unsigned __resv1:31;
204*53ee8cc1Swenshuai.xi #endif
205*53ee8cc1Swenshuai.xi 		};
206*53ee8cc1Swenshuai.xi 		unsigned v;
207*53ee8cc1Swenshuai.xi 	};
208*53ee8cc1Swenshuai.xi } amac_status_t;
209*53ee8cc1Swenshuai.xi 
210*53ee8cc1Swenshuai.xi /* Debug group */
211*53ee8cc1Swenshuai.xi #define SPR_DVR(N)	(SPRGROUP_D + (N))
212*53ee8cc1Swenshuai.xi #define SPR_DCR(N)	(SPRGROUP_D + 8 + (N))
213*53ee8cc1Swenshuai.xi #define SPR_DMR1	(SPRGROUP_D + 16)
214*53ee8cc1Swenshuai.xi #define SPR_DMR2	(SPRGROUP_D + 17)
215*53ee8cc1Swenshuai.xi #define SPR_DWCR0	(SPRGROUP_D + 18)
216*53ee8cc1Swenshuai.xi #define SPR_DWCR1	(SPRGROUP_D + 19)
217*53ee8cc1Swenshuai.xi #define SPR_DSR		(SPRGROUP_D + 20)
218*53ee8cc1Swenshuai.xi #define SPR_DRR		(SPRGROUP_D + 21)
219*53ee8cc1Swenshuai.xi #define SPR_DREASON	(SPRGROUP_D + 22)
220*53ee8cc1Swenshuai.xi 
221*53ee8cc1Swenshuai.xi /* Performance counters group */
222*53ee8cc1Swenshuai.xi #define SPR_PCCR(N)	(SPRGROUP_PC + (N))
223*53ee8cc1Swenshuai.xi #define SPR_PCMR(N)	(SPRGROUP_PC + 8 + (N))
224*53ee8cc1Swenshuai.xi 
225*53ee8cc1Swenshuai.xi /* Power management group */
226*53ee8cc1Swenshuai.xi #define SPR_PMR (SPRGROUP_PM + 0)
227*53ee8cc1Swenshuai.xi 
228*53ee8cc1Swenshuai.xi #define SPR_PM_PMR  SPR_GRP_OFF(SPR_PMR)
229*53ee8cc1Swenshuai.xi 
230*53ee8cc1Swenshuai.xi /* PIC group */
231*53ee8cc1Swenshuai.xi #define SPR_PICMR (SPRGROUP_PIC + 0)
232*53ee8cc1Swenshuai.xi #define SPR_PICPR (SPRGROUP_PIC + 1)
233*53ee8cc1Swenshuai.xi #define SPR_PICSR (SPRGROUP_PIC + 2)
234*53ee8cc1Swenshuai.xi 
235*53ee8cc1Swenshuai.xi /* Tick Timer group */
236*53ee8cc1Swenshuai.xi #define SPR_TTMR (SPRGROUP_TT + 0)
237*53ee8cc1Swenshuai.xi #define SPR_TTCR (SPRGROUP_TT + 1)
238*53ee8cc1Swenshuai.xi #define SPR_TTUR (SPRGROUP_TT + 31)
239*53ee8cc1Swenshuai.xi 
240*53ee8cc1Swenshuai.xi #define SPR_TT_TTMR	    SPR_GRP_OFF(SPR_TTMR)
241*53ee8cc1Swenshuai.xi #define SPR_TT_TTCR		SPR_GRP_OFF(SPR_TTCR)
242*53ee8cc1Swenshuai.xi 
243*53ee8cc1Swenshuai.xi /* Accelerator group */
244*53ee8cc1Swenshuai.xi #define SPR_ACCEL_FFT		(SPRGROUP_ACCEL + 0)
245*53ee8cc1Swenshuai.xi #define SPR_ACCEL_HW_LOOP_COUNTER0	(SPRGROUP_ACCEL + 8)
246*53ee8cc1Swenshuai.xi #define SPR_ACCEL_HW_LOOP_COUNTER1	(SPRGROUP_ACCEL + 9)
247*53ee8cc1Swenshuai.xi #define SPR_ACCEL_HW_LOOP_COUNTER2	(SPRGROUP_ACCEL + 10)
248*53ee8cc1Swenshuai.xi #define SPR_ACCEL_HW_LOOP_COUNTER3	(SPRGROUP_ACCEL + 11)
249*53ee8cc1Swenshuai.xi 
250*53ee8cc1Swenshuai.xi #define FFT_ADDR_MODE_1_1       0
251*53ee8cc1Swenshuai.xi #define FFT_ADDR_MODE_2_2       1
252*53ee8cc1Swenshuai.xi 
253*53ee8cc1Swenshuai.xi typedef struct
254*53ee8cc1Swenshuai.xi {
255*53ee8cc1Swenshuai.xi 	union
256*53ee8cc1Swenshuai.xi 	{
257*53ee8cc1Swenshuai.xi 		struct
258*53ee8cc1Swenshuai.xi 		{
259*53ee8cc1Swenshuai.xi 			unsigned busy:1;
260*53ee8cc1Swenshuai.xi 			unsigned __resv1:31;
261*53ee8cc1Swenshuai.xi 		};
262*53ee8cc1Swenshuai.xi 		unsigned v;
263*53ee8cc1Swenshuai.xi 	};
264*53ee8cc1Swenshuai.xi } fft_status_t;
265*53ee8cc1Swenshuai.xi 
266*53ee8cc1Swenshuai.xi /*
267*53ee8cc1Swenshuai.xi  * Bit definitions for the Version Register
268*53ee8cc1Swenshuai.xi  *
269*53ee8cc1Swenshuai.xi  */
270*53ee8cc1Swenshuai.xi #define SPR_VR_VER	0xffff0000  /* Processor version */
271*53ee8cc1Swenshuai.xi #define SPR_VR_CFG	0x00ff0000  /* Processor configuration */
272*53ee8cc1Swenshuai.xi #define SPR_VR_REV	0x0000003f  /* Processor revision */
273*53ee8cc1Swenshuai.xi 
274*53ee8cc1Swenshuai.xi /*
275*53ee8cc1Swenshuai.xi  * Bit definitions for the Unit Present Register
276*53ee8cc1Swenshuai.xi  *
277*53ee8cc1Swenshuai.xi  */
278*53ee8cc1Swenshuai.xi #define SPR_UPR_UP	0x00000001  /* UPR present */
279*53ee8cc1Swenshuai.xi #define SPR_UPR_DCP	0x00000002  /* Data cache present */
280*53ee8cc1Swenshuai.xi #define SPR_UPR_ICP	0x00000004  /* Instruction cache present */
281*53ee8cc1Swenshuai.xi #define SPR_UPR_DMP	0x00000008  /* Data MMU present */
282*53ee8cc1Swenshuai.xi #define SPR_UPR_IMP	0x00000010  /* Instruction MMU present */
283*53ee8cc1Swenshuai.xi #define SPR_UPR_MP  0x00000020  /* MAC Present */
284*53ee8cc1Swenshuai.xi #define SPR_UPR_DUP	0x00000040  /* Debug unit present */
285*53ee8cc1Swenshuai.xi #define SPR_UPR_PCUP	0x00000080  /* Performance counters unit present */
286*53ee8cc1Swenshuai.xi #define SPR_UPR_PMP	0x00000100  /* Power management present */
287*53ee8cc1Swenshuai.xi #define SPR_UPR_PICP	0x00000200  /* PIC present */
288*53ee8cc1Swenshuai.xi #define SPR_UPR_TTP	0x00000400  /* Tick timer present */
289*53ee8cc1Swenshuai.xi #define SPR_UPR_CUST	0xff000000  /* Custom units */
290*53ee8cc1Swenshuai.xi 
291*53ee8cc1Swenshuai.xi /*
292*53ee8cc1Swenshuai.xi  * Bit definitions for the Supervision Register
293*53ee8cc1Swenshuai.xi  *
294*53ee8cc1Swenshuai.xi  */
295*53ee8cc1Swenshuai.xi #define SPR_SR_CID      0xf0000000  /* Context ID */
296*53ee8cc1Swenshuai.xi #define SPR_SR_SUMRA    0x00010000  /* Supervisor SPR read access */
297*53ee8cc1Swenshuai.xi #define SPR_SR_FO       0x00008000  /* Fixed one */
298*53ee8cc1Swenshuai.xi #define SPR_SR_EPH      0x00004000  /* Exception Prefix High */
299*53ee8cc1Swenshuai.xi #define SPR_SR_DSX      0x00002000  /* Delay Slot Exception */
300*53ee8cc1Swenshuai.xi #define SPR_SR_OVE      0x00001000  /* Overflow flag Exception */
301*53ee8cc1Swenshuai.xi #define SPR_SR_OV       0x00000800  /* Overflow flag */
302*53ee8cc1Swenshuai.xi #define SPR_SR_CY       0x00000400  /* Carry flag */
303*53ee8cc1Swenshuai.xi #define SPR_SR_F        0x00000200  /* Condition Flag */
304*53ee8cc1Swenshuai.xi #define SPR_SR_CE       0x00000100  /* CID Enable */
305*53ee8cc1Swenshuai.xi #define SPR_SR_LEE      0x00000080  /* Little Endian Enable */
306*53ee8cc1Swenshuai.xi #define SPR_SR_IME      0x00000040  /* Instruction MMU Enable */
307*53ee8cc1Swenshuai.xi #define SPR_SR_DME      0x00000020  /* Data MMU Enable */
308*53ee8cc1Swenshuai.xi #define SPR_SR_ICE      0x00000010  /* Instruction Cache Enable */
309*53ee8cc1Swenshuai.xi #define SPR_SR_DCE      0x00000008  /* Data Cache Enable */
310*53ee8cc1Swenshuai.xi #define SPR_SR_IEE      0x00000004  /* Interrupt Exception Enable */
311*53ee8cc1Swenshuai.xi #define SPR_SR_TEE      0x00000002  /* Tick timer Exception Enable */
312*53ee8cc1Swenshuai.xi #define SPR_SR_SM       0x00000001  /* Supervisor Mode */
313*53ee8cc1Swenshuai.xi 
314*53ee8cc1Swenshuai.xi /*
315*53ee8cc1Swenshuai.xi  * Bit definitions for the Data MMU Control Register
316*53ee8cc1Swenshuai.xi  *
317*53ee8cc1Swenshuai.xi  */
318*53ee8cc1Swenshuai.xi #define SPR_DMMUCR_P2S	0x0000003e  /* Level 2 Page Size */
319*53ee8cc1Swenshuai.xi #define SPR_DMMUCR_P1S	0x000007c0  /* Level 1 Page Size */
320*53ee8cc1Swenshuai.xi #define SPR_DMMUCR_VADDR_WIDTH	0x0000f800  /* Virtual ADDR Width */
321*53ee8cc1Swenshuai.xi #define SPR_DMMUCR_PADDR_WIDTH	0x000f0000  /* Physical ADDR Width */
322*53ee8cc1Swenshuai.xi 
323*53ee8cc1Swenshuai.xi /*
324*53ee8cc1Swenshuai.xi  * Bit definitions for the Instruction MMU Control Register
325*53ee8cc1Swenshuai.xi  *
326*53ee8cc1Swenshuai.xi  */
327*53ee8cc1Swenshuai.xi #define SPR_IMMUCR_P2S	0x0000003e  /* Level 2 Page Size */
328*53ee8cc1Swenshuai.xi #define SPR_IMMUCR_P1S	0x000007c0  /* Level 1 Page Size */
329*53ee8cc1Swenshuai.xi #define SPR_IMMUCR_VADDR_WIDTH	0x0000f800  /* Virtual ADDR Width */
330*53ee8cc1Swenshuai.xi #define SPR_IMMUCR_PADDR_WIDTH	0x000f0000  /* Physical ADDR Width */
331*53ee8cc1Swenshuai.xi 
332*53ee8cc1Swenshuai.xi /*
333*53ee8cc1Swenshuai.xi  * Bit definitions for the Data TLB Match Register
334*53ee8cc1Swenshuai.xi  *
335*53ee8cc1Swenshuai.xi  */
336*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_V	0x00000001  /* Valid */
337*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_PL1	0x00000002  /* Page Level 1 (if 0 then PL2) */
338*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_CID	0x0000003c  /* Context ID */
339*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_LRU	0x000000c0  /* Least Recently Used */
340*53ee8cc1Swenshuai.xi #define SPR_DTLBMR_VPN	0xfffff000  /* Virtual Page Number */
341*53ee8cc1Swenshuai.xi 
342*53ee8cc1Swenshuai.xi /*
343*53ee8cc1Swenshuai.xi  * Bit definitions for the Data TLB Translate Register
344*53ee8cc1Swenshuai.xi  *
345*53ee8cc1Swenshuai.xi  */
346*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_CC	0x00000001  /* Cache Coherency */
347*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_CI	0x00000002  /* Cache Inhibit */
348*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_WBC	0x00000004  /* Write-Back Cache */
349*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_WOM	0x00000008  /* Weakly-Ordered Memory */
350*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_A	0x00000010  /* Accessed */
351*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_D	0x00000020  /* Dirty */
352*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_URE	0x00000040  /* User Read Enable */
353*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_UWE	0x00000080  /* User Write Enable */
354*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_SRE	0x00000100  /* Supervisor Read Enable */
355*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_SWE	0x00000200  /* Supervisor Write Enable */
356*53ee8cc1Swenshuai.xi #define SPR_DTLBTR_PPN	0xfffff000  /* Physical Page Number */
357*53ee8cc1Swenshuai.xi 
358*53ee8cc1Swenshuai.xi /*
359*53ee8cc1Swenshuai.xi  * Bit definitions for the Instruction TLB Match Register
360*53ee8cc1Swenshuai.xi  *
361*53ee8cc1Swenshuai.xi  */
362*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_V	0x00000001  /* Valid */
363*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_PL1	0x00000002  /* Page Level 1 (if 0 then PL2) */
364*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_CID	0x0000003c  /* Context ID */
365*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_LRU	0x000000c0  /* Least Recently Used */
366*53ee8cc1Swenshuai.xi #define SPR_ITLBMR_VPN	0xfffff000  /* Virtual Page Number */
367*53ee8cc1Swenshuai.xi 
368*53ee8cc1Swenshuai.xi /*
369*53ee8cc1Swenshuai.xi  * Bit definitions for the Instruction TLB Translate Register
370*53ee8cc1Swenshuai.xi  *
371*53ee8cc1Swenshuai.xi  */
372*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_CC	0x00000001  /* Cache Coherency */
373*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_CI	0x00000002  /* Cache Inhibit */
374*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_WBC	0x00000004  /* Write-Back Cache */
375*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_WOM	0x00000008  /* Weakly-Ordered Memory */
376*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_A	0x00000010  /* Accessed */
377*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_D	0x00000020  /* Dirty */
378*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_SXE	0x00000040  /* User Read Enable */
379*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_UXE	0x00000080  /* User Write Enable */
380*53ee8cc1Swenshuai.xi #define SPR_ITLBTR_PPN	0xfffff000  /* Physical Page Number */
381*53ee8cc1Swenshuai.xi 
382*53ee8cc1Swenshuai.xi /*
383*53ee8cc1Swenshuai.xi  * Bit definitions for Data Cache Control register
384*53ee8cc1Swenshuai.xi  *
385*53ee8cc1Swenshuai.xi  */
386*53ee8cc1Swenshuai.xi #define SPR_DCCR_EW	0x000000ff  /* Enable ways */
387*53ee8cc1Swenshuai.xi 
388*53ee8cc1Swenshuai.xi /*
389*53ee8cc1Swenshuai.xi  * Bit definitions for Insn Cache Control register
390*53ee8cc1Swenshuai.xi  *
391*53ee8cc1Swenshuai.xi  */
392*53ee8cc1Swenshuai.xi #define SPR_ICCR_EW	0x000000ff  /* Enable ways */
393*53ee8cc1Swenshuai.xi 
394*53ee8cc1Swenshuai.xi /*
395*53ee8cc1Swenshuai.xi  * Bit definitions for Debug Control register
396*53ee8cc1Swenshuai.xi  *
397*53ee8cc1Swenshuai.xi  */
398*53ee8cc1Swenshuai.xi #if 1
399*53ee8cc1Swenshuai.xi #define SPR_DCFGR_NDP	0x00000007
400*53ee8cc1Swenshuai.xi #define SPR_DCFGR_WPCI	0x00000008
401*53ee8cc1Swenshuai.xi #else
402*53ee8cc1Swenshuai.xi #define SPR_DCFGR_NDP   0x0000000f
403*53ee8cc1Swenshuai.xi #define SPR_DCFGR_WPCI  0x00000010
404*53ee8cc1Swenshuai.xi #endif
405*53ee8cc1Swenshuai.xi 
406*53ee8cc1Swenshuai.xi 
407*53ee8cc1Swenshuai.xi /*
408*53ee8cc1Swenshuai.xi  * Bit definitions for Data Cache Configuration Register
409*53ee8cc1Swenshuai.xi  *
410*53ee8cc1Swenshuai.xi  */
411*53ee8cc1Swenshuai.xi 
412*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_NCW		0x00000007
413*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_NCS		0x00000078
414*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CBS		0x00000080
415*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CWS		0x00000100
416*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CCRI		0x00000200
417*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CBIRI	0x00000400
418*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CBPRI	0x00000800
419*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CBLRI	0x00001000
420*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CBFRI	0x00002000
421*53ee8cc1Swenshuai.xi #define SPR_DCCFGR_CBWBRI	0x00004000
422*53ee8cc1Swenshuai.xi 
423*53ee8cc1Swenshuai.xi /*
424*53ee8cc1Swenshuai.xi  * Bit definitions for Instruction Cache Configuration Register
425*53ee8cc1Swenshuai.xi  *
426*53ee8cc1Swenshuai.xi  */
427*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_NCW		0x00000007
428*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_NCS		0x00000078
429*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_CBS		0x00000080
430*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_CCRI		0x00000200
431*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_CBIRI	0x00000400
432*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_CBPRI	0x00000800
433*53ee8cc1Swenshuai.xi #define SPR_ICCFGR_CBLRI	0x00001000
434*53ee8cc1Swenshuai.xi 
435*53ee8cc1Swenshuai.xi /*
436*53ee8cc1Swenshuai.xi  * Bit definitions for Data MMU Configuration Register
437*53ee8cc1Swenshuai.xi  *
438*53ee8cc1Swenshuai.xi  */
439*53ee8cc1Swenshuai.xi 
440*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_NTW	0x00000003
441*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_NTS	0x0000001C
442*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_NAE	0x000000E0
443*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_CRI	0x00000100
444*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_PRI        0x00000200
445*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_TEIRI	0x00000400
446*53ee8cc1Swenshuai.xi #define SPR_DMMUCFGR_HTR	0x00000800
447*53ee8cc1Swenshuai.xi 
448*53ee8cc1Swenshuai.xi /*
449*53ee8cc1Swenshuai.xi  * Bit definitions for Instruction MMU Configuration Register
450*53ee8cc1Swenshuai.xi  *
451*53ee8cc1Swenshuai.xi  */
452*53ee8cc1Swenshuai.xi 
453*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_NTW	0x00000003
454*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_NTS	0x0000001C
455*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_NAE	0x000000E0
456*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_CRI	0x00000100
457*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_PRI	0x00000200
458*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_TEIRI	0x00000400
459*53ee8cc1Swenshuai.xi #define SPR_IMMUCFGR_HTR	0x00000800
460*53ee8cc1Swenshuai.xi 
461*53ee8cc1Swenshuai.xi /*
462*53ee8cc1Swenshuai.xi  * Bit definitions for Debug Control registers
463*53ee8cc1Swenshuai.xi  *
464*53ee8cc1Swenshuai.xi  */
465*53ee8cc1Swenshuai.xi #define SPR_DCR_DP	0x00000001  /* DVR/DCR present */
466*53ee8cc1Swenshuai.xi #define SPR_DCR_CC	0x0000000e  /* Compare condition */
467*53ee8cc1Swenshuai.xi #define SPR_DCR_SC	0x00000010  /* Signed compare */
468*53ee8cc1Swenshuai.xi #define SPR_DCR_CT	0x000000e0  /* Compare to */
469*53ee8cc1Swenshuai.xi 
470*53ee8cc1Swenshuai.xi /* Bit results with SPR_DCR_CC mask */
471*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_MASKED 0x00000000
472*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_EQUAL  0x00000002
473*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_LESS   0x00000004
474*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_LESSE  0x00000006
475*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_GREAT  0x00000008
476*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_GREATE 0x0000000a
477*53ee8cc1Swenshuai.xi #define SPR_DCR_CC_NEQUAL 0x0000000c
478*53ee8cc1Swenshuai.xi 
479*53ee8cc1Swenshuai.xi /* Bit results with SPR_DCR_CT mask */
480*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_DISABLED 0x00000000
481*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_IFEA     0x00000020
482*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_LEA      0x00000040
483*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_SEA      0x00000060
484*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_LD       0x00000080
485*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_SD       0x000000a0
486*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_LSEA     0x000000c0
487*53ee8cc1Swenshuai.xi #define SPR_DCR_CT_LSD		0x000000e0
488*53ee8cc1Swenshuai.xi /* SPR_DCR_CT_LSD doesn't seem to be implemented anywhere in or1ksim. 2004-1-30 HP */
489*53ee8cc1Swenshuai.xi 
490*53ee8cc1Swenshuai.xi /*
491*53ee8cc1Swenshuai.xi  * Bit definitions for Debug Mode 1 register
492*53ee8cc1Swenshuai.xi  *
493*53ee8cc1Swenshuai.xi  */
494*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW0	0x00000003  /* Chain watchpoint 0 */
495*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW1	0x0000000c  /* Chain watchpoint 1 */
496*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW2	0x00000030  /* Chain watchpoint 2 */
497*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW3	0x000000c0  /* Chain watchpoint 3 */
498*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW4	0x00000300  /* Chain watchpoint 4 */
499*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW5	0x00000c00  /* Chain watchpoint 5 */
500*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW6	0x00003000  /* Chain watchpoint 6 */
501*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW7	0x0000c000  /* Chain watchpoint 7 */
502*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW8	0x00030000  /* Chain watchpoint 8 */
503*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW9	0x000c0000  /* Chain watchpoint 9 */
504*53ee8cc1Swenshuai.xi #define SPR_DMR1_CW10	0x00300000  /* Chain watchpoint 10 */
505*53ee8cc1Swenshuai.xi #define SPR_DMR1_ST	0x00400000  /* Single-step trace*/
506*53ee8cc1Swenshuai.xi #define SPR_DMR1_BT	0x00800000  /* Branch trace */
507*53ee8cc1Swenshuai.xi #define SPR_DMR1_DXFW	0x01000000  /* Disable external force watchpoint */
508*53ee8cc1Swenshuai.xi 
509*53ee8cc1Swenshuai.xi /*
510*53ee8cc1Swenshuai.xi  * Bit definitions for Debug Mode 2 register
511*53ee8cc1Swenshuai.xi  *
512*53ee8cc1Swenshuai.xi  */
513*53ee8cc1Swenshuai.xi #define SPR_DMR2_WCE0	0x00000001  /* Watchpoint counter 0 enable */
514*53ee8cc1Swenshuai.xi #define SPR_DMR2_WCE1	0x00000002  /* Watchpoint counter 1 enable */
515*53ee8cc1Swenshuai.xi #define SPR_DMR2_AWTC	0x00000ffc  /* Assign watchpoints to counters */
516*53ee8cc1Swenshuai.xi #define SPR_DMR2_WGB	0x003ff000  /* Watchpoints generating breakpoint */
517*53ee8cc1Swenshuai.xi #define SPR_DMR2_WBS	0xffc00000  /* Watchpoints generated breakpoint */
518*53ee8cc1Swenshuai.xi 
519*53ee8cc1Swenshuai.xi /*
520*53ee8cc1Swenshuai.xi  * Bit definitions for Debug watchpoint counter registers
521*53ee8cc1Swenshuai.xi  *
522*53ee8cc1Swenshuai.xi  */
523*53ee8cc1Swenshuai.xi #define SPR_DWCR_COUNT	0x0000ffff  /* Count */
524*53ee8cc1Swenshuai.xi #define SPR_DWCR_MATCH	0xffff0000  /* Match */
525*53ee8cc1Swenshuai.xi 
526*53ee8cc1Swenshuai.xi /*
527*53ee8cc1Swenshuai.xi  * Bit definitions for Debug stop register
528*53ee8cc1Swenshuai.xi  *
529*53ee8cc1Swenshuai.xi  */
530*53ee8cc1Swenshuai.xi #define SPR_DSR_RSTE	0x00000001  /* Reset exception */
531*53ee8cc1Swenshuai.xi #define SPR_DSR_BUSEE	0x00000002  /* Bus error exception */
532*53ee8cc1Swenshuai.xi #define SPR_DSR_DPFE	0x00000004  /* Data Page Fault exception */
533*53ee8cc1Swenshuai.xi #define SPR_DSR_IPFE	0x00000008  /* Insn Page Fault exception */
534*53ee8cc1Swenshuai.xi #define SPR_DSR_TTE	0x00000010  /* Tick Timer exception */
535*53ee8cc1Swenshuai.xi #define SPR_DSR_AE	0x00000020  /* Alignment exception */
536*53ee8cc1Swenshuai.xi #define SPR_DSR_IIE	0x00000040  /* Illegal Instruction exception */
537*53ee8cc1Swenshuai.xi #define SPR_DSR_IE	0x00000080  /* Interrupt exception */
538*53ee8cc1Swenshuai.xi #define SPR_DSR_DME	0x00000100  /* DTLB miss exception */
539*53ee8cc1Swenshuai.xi #define SPR_DSR_IME	0x00000200  /* ITLB miss exception */
540*53ee8cc1Swenshuai.xi #define SPR_DSR_RE	0x00000400  /* Range exception */
541*53ee8cc1Swenshuai.xi #define SPR_DSR_SCE	0x00000800  /* System call exception */
542*53ee8cc1Swenshuai.xi #define SPR_DSR_FPE     0x00001000  /* Floating Point Exception */
543*53ee8cc1Swenshuai.xi #define SPR_DSR_TE	0x00002000  /* Trap exception */
544*53ee8cc1Swenshuai.xi 
545*53ee8cc1Swenshuai.xi /*
546*53ee8cc1Swenshuai.xi  * Bit definitions for Debug reason register
547*53ee8cc1Swenshuai.xi  *
548*53ee8cc1Swenshuai.xi  */
549*53ee8cc1Swenshuai.xi #define SPR_DRR_RSTE	0x00000001  /* Reset exception */
550*53ee8cc1Swenshuai.xi #define SPR_DRR_BUSEE	0x00000002  /* Bus error exception */
551*53ee8cc1Swenshuai.xi #define SPR_DRR_DPFE	0x00000004  /* Data Page Fault exception */
552*53ee8cc1Swenshuai.xi #define SPR_DRR_IPFE	0x00000008  /* Insn Page Fault exception */
553*53ee8cc1Swenshuai.xi #define SPR_DRR_TTE	0x00000010  /* Tick Timer exception */
554*53ee8cc1Swenshuai.xi #define SPR_DRR_AE	0x00000020  /* Alignment exception */
555*53ee8cc1Swenshuai.xi #define SPR_DRR_IIE	0x00000040  /* Illegal Instruction exception */
556*53ee8cc1Swenshuai.xi #define SPR_DRR_IE	0x00000080  /* Interrupt exception */
557*53ee8cc1Swenshuai.xi #define SPR_DRR_DME	0x00000100  /* DTLB miss exception */
558*53ee8cc1Swenshuai.xi #define SPR_DRR_IME	0x00000200  /* ITLB miss exception */
559*53ee8cc1Swenshuai.xi #define SPR_DRR_RE	0x00000400  /* Range exception */
560*53ee8cc1Swenshuai.xi #define SPR_DRR_SCE	0x00000800  /* System call exception */
561*53ee8cc1Swenshuai.xi #define SPR_DRR_FPE     0x00001000  /* Floating Point Exception */
562*53ee8cc1Swenshuai.xi #define SPR_DRR_TE	0x00002000  /* Trap exception */
563*53ee8cc1Swenshuai.xi 
564*53ee8cc1Swenshuai.xi /*
565*53ee8cc1Swenshuai.xi  * Bit definitions for Performance counters mode registers
566*53ee8cc1Swenshuai.xi  *
567*53ee8cc1Swenshuai.xi  */
568*53ee8cc1Swenshuai.xi #define SPR_PCMR_CP	0x00000001  /* Counter present */
569*53ee8cc1Swenshuai.xi #define SPR_PCMR_UMRA	0x00000002  /* User mode read access */
570*53ee8cc1Swenshuai.xi #define SPR_PCMR_CISM	0x00000004  /* Count in supervisor mode */
571*53ee8cc1Swenshuai.xi #define SPR_PCMR_CIUM	0x00000008  /* Count in user mode */
572*53ee8cc1Swenshuai.xi #define SPR_PCMR_LA	0x00000010  /* Load access event */
573*53ee8cc1Swenshuai.xi #define SPR_PCMR_SA	0x00000020  /* Store access event */
574*53ee8cc1Swenshuai.xi #define SPR_PCMR_IF	0x00000040  /* Instruction fetch event*/
575*53ee8cc1Swenshuai.xi #define SPR_PCMR_DCM	0x00000080  /* Data cache miss event */
576*53ee8cc1Swenshuai.xi #define SPR_PCMR_ICM	0x00000100  /* Insn cache miss event */
577*53ee8cc1Swenshuai.xi #define SPR_PCMR_IFS	0x00000200  /* Insn fetch stall event */
578*53ee8cc1Swenshuai.xi #define SPR_PCMR_LSUS	0x00000400  /* LSU stall event */
579*53ee8cc1Swenshuai.xi #define SPR_PCMR_BS	0x00000800  /* Branch stall event */
580*53ee8cc1Swenshuai.xi #define SPR_PCMR_DTLBM	0x00001000  /* DTLB miss event */
581*53ee8cc1Swenshuai.xi #define SPR_PCMR_ITLBM	0x00002000  /* ITLB miss event */
582*53ee8cc1Swenshuai.xi #define SPR_PCMR_DDS	0x00004000  /* Data dependency stall event */
583*53ee8cc1Swenshuai.xi #define SPR_PCMR_WPE	0x03ff8000  /* Watchpoint events */
584*53ee8cc1Swenshuai.xi 
585*53ee8cc1Swenshuai.xi /*
586*53ee8cc1Swenshuai.xi  * Bit definitions for the Power management register
587*53ee8cc1Swenshuai.xi  *
588*53ee8cc1Swenshuai.xi  */
589*53ee8cc1Swenshuai.xi #define SPR_PMR_SDF	0x0000000f  /* Slow down factor */
590*53ee8cc1Swenshuai.xi #define SPR_PMR_DME	0x00000010  /* Doze mode enable */
591*53ee8cc1Swenshuai.xi #define SPR_PMR_SME	0x00000020  /* Sleep mode enable */
592*53ee8cc1Swenshuai.xi #define SPR_PMR_DCGE	0x00000040  /* Dynamic clock gating enable */
593*53ee8cc1Swenshuai.xi #define SPR_PMR_SUME	0x00000080  /* Suspend mode enable */
594*53ee8cc1Swenshuai.xi 
595*53ee8cc1Swenshuai.xi /*
596*53ee8cc1Swenshuai.xi  * Bit definitions for PICMR
597*53ee8cc1Swenshuai.xi  *
598*53ee8cc1Swenshuai.xi  */
599*53ee8cc1Swenshuai.xi #define SPR_PICMR_IUM	0xfffffffc  /* Interrupt unmask */
600*53ee8cc1Swenshuai.xi 
601*53ee8cc1Swenshuai.xi /*
602*53ee8cc1Swenshuai.xi  * Bit definitions for PICPR
603*53ee8cc1Swenshuai.xi  *
604*53ee8cc1Swenshuai.xi  */
605*53ee8cc1Swenshuai.xi #define SPR_PICPR_IPRIO	0xfffffffc  /* Interrupt priority */
606*53ee8cc1Swenshuai.xi 
607*53ee8cc1Swenshuai.xi /*
608*53ee8cc1Swenshuai.xi  * Bit definitions for PICSR
609*53ee8cc1Swenshuai.xi  *
610*53ee8cc1Swenshuai.xi  */
611*53ee8cc1Swenshuai.xi #define SPR_PICSR_IS	0xffffffff  /* Interrupt status */
612*53ee8cc1Swenshuai.xi 
613*53ee8cc1Swenshuai.xi /*
614*53ee8cc1Swenshuai.xi  * Bit definitions for Tick Timer Control Register
615*53ee8cc1Swenshuai.xi  *
616*53ee8cc1Swenshuai.xi  */
617*53ee8cc1Swenshuai.xi #define SPR_TTCR_PERIOD	0x0fffffff  /* Time Period */
618*53ee8cc1Swenshuai.xi #define SPR_TTMR_PERIOD	SPR_TTCR_PERIOD
619*53ee8cc1Swenshuai.xi #define SPR_TTMR_IP	0x10000000  /* Interrupt Pending */
620*53ee8cc1Swenshuai.xi #define SPR_TTMR_IE	0x20000000  /* Interrupt Enable */
621*53ee8cc1Swenshuai.xi #define SPR_TTMR_RT	0x40000000  /* Restart tick */
622*53ee8cc1Swenshuai.xi #define SPR_TTMR_SR     0x80000000  /* Single run */
623*53ee8cc1Swenshuai.xi #define SPR_TTMR_CR     0xc0000000  /* Continuous run */
624*53ee8cc1Swenshuai.xi #define SPR_TTMR_M      0xc0000000  /* Tick mode */
625*53ee8cc1Swenshuai.xi 
626*53ee8cc1Swenshuai.xi #define SPR_TTUR_ICTM   0x00000001  /* Instruction Counting Timer Mode */
627*53ee8cc1Swenshuai.xi 
628*53ee8cc1Swenshuai.xi /*
629*53ee8cc1Swenshuai.xi  * l.nop constants
630*53ee8cc1Swenshuai.xi  *
631*53ee8cc1Swenshuai.xi  */
632*53ee8cc1Swenshuai.xi #define NOP_NOP         0x0000      /* Normal nop instruction */
633*53ee8cc1Swenshuai.xi #define NOP_EXIT        0x0001      /* End of simulation */
634*53ee8cc1Swenshuai.xi #define NOP_REPORT      0x0002      /* Simple report */
635*53ee8cc1Swenshuai.xi #define NOP_PRINTF      0x0003      /* Simprintf instruction */
636*53ee8cc1Swenshuai.xi #define NOP_CNT_RESET   0x0005	    /* Reset statistics counters */
637*53ee8cc1Swenshuai.xi #define NOP_SIM_SYS_CALL 0x0006     /* Simulator syscall emulation */
638*53ee8cc1Swenshuai.xi #define NOP_REPORT_FIRST 0x0400     /* Report with number */
639*53ee8cc1Swenshuai.xi #define NOP_REPORT_LAST 0x03ff      /* Report with number */
640*53ee8cc1Swenshuai.xi 
641*53ee8cc1Swenshuai.xi 
642*53ee8cc1Swenshuai.xi #ifndef HAZARD
643*53ee8cc1Swenshuai.xi #define HAZARD(x)       x
644*53ee8cc1Swenshuai.xi #endif
645*53ee8cc1Swenshuai.xi 
646*53ee8cc1Swenshuai.xi #define L_FF1(x)                                            \
647*53ee8cc1Swenshuai.xi ({  int _res_, _x_ = x; __asm__ volatile ("l.ff1\t%0,%1" : "=r"(_res_) : "r"(_x_)); _res_; })
648*53ee8cc1Swenshuai.xi 
649*53ee8cc1Swenshuai.xi #define L_FL1(x)                                            \
650*53ee8cc1Swenshuai.xi ({  int _res_, _x_ = x; __asm__ volatile ("l.fl1\t%0,%1" : "=r"(_res_) : "r"(_x_)); _res_; })
651*53ee8cc1Swenshuai.xi 
652*53ee8cc1Swenshuai.xi /* Move from architecture special register (SPR) */
653*53ee8cc1Swenshuai.xi #define __l_mfspr__(result, spr_grp, spr_reg)               \
654*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mfspr\t%0,%1,%2" : "=r"(result) : "r"(spr_reg), "K"(spr_grp));
655*53ee8cc1Swenshuai.xi #define __l_mfspr1__(result, spr)                           \
656*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mfspr\t%0,r0,%1" : "=r"(result) : "K"(spr))
657*53ee8cc1Swenshuai.xi #define __l_mtspr__(spr_grp, spr_reg, val)                  \
658*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mtspr\t%0,%1,%2" : : "r"(spr_reg), "r"(val), "K"(spr_grp));
659*53ee8cc1Swenshuai.xi #define __l_mtspr1__(spr,val)                               \
660*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mtspr\tr0,%0,%1" : : "r"(val), "K"(spr))
661*53ee8cc1Swenshuai.xi 
662*53ee8cc1Swenshuai.xi #define L_MFSPR(_spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_) \
663*53ee8cc1Swenshuai.xi ({  int _res_; __l_mfspr__(_res_, _spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_); _res_; })
664*53ee8cc1Swenshuai.xi 
665*53ee8cc1Swenshuai.xi #define L_MFSPR1(_spr_register_16bit_const_)                \
666*53ee8cc1Swenshuai.xi ({  int _res_; __l_mfspr1__(_res_, _spr_register_16bit_const_); _res_; })
667*53ee8cc1Swenshuai.xi 
668*53ee8cc1Swenshuai.xi /* Move data to architecture special registers (SPR) */
669*53ee8cc1Swenshuai.xi #define L_MTSPR(_spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_, _val_) \
670*53ee8cc1Swenshuai.xi     __l_mtspr__(_spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_, _val_)
671*53ee8cc1Swenshuai.xi 
672*53ee8cc1Swenshuai.xi #define L_MTSPR1(_spr_register_16bit_const_, _val_)         \
673*53ee8cc1Swenshuai.xi     __l_mtspr1__(_spr_register_16bit_const_, _val_)
674*53ee8cc1Swenshuai.xi 
675*53ee8cc1Swenshuai.xi /* Enhancement to existing multiply (keep full 64-bit) */
676*53ee8cc1Swenshuai.xi #define __l_mul__(lo, a,b)                  \
677*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.mul\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
678*53ee8cc1Swenshuai.xi #define __l_muli__(lo, a,b)                 \
679*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.muli\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "I"(b)); }
680*53ee8cc1Swenshuai.xi #define __l_mulu__(lo, a,b)                 \
681*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.mulu\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
682*53ee8cc1Swenshuai.xi 
683*53ee8cc1Swenshuai.xi /* signed 32x32 to signed 64-bit multiply */
684*53ee8cc1Swenshuai.xi #define L_MULL(a,b)                         \
685*53ee8cc1Swenshuai.xi ({  int _lo_; __l_mul__(_lo_, a,b);         \
686*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr1__(_hi_, SPR_MACHI2); \
687*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
688*53ee8cc1Swenshuai.xi 
689*53ee8cc1Swenshuai.xi #define L_MULL_LO(a,b)                      \
690*53ee8cc1Swenshuai.xi ({  int _lo_; __l_mul__(_lo_, a,b); _lo_; })
691*53ee8cc1Swenshuai.xi 
692*53ee8cc1Swenshuai.xi #define L_MULLI(a,b)                        \
693*53ee8cc1Swenshuai.xi ({  int _lo_; __l_muli__(_lo_, a,b);        \
694*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr1__(_hi_, SPR_MACHI2); \
695*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
696*53ee8cc1Swenshuai.xi 
697*53ee8cc1Swenshuai.xi #define L_MULLI_LO(a,b)                     \
698*53ee8cc1Swenshuai.xi ({  int _lo_; __l_muli__(_lo_, a,b); _lo_; })
699*53ee8cc1Swenshuai.xi 
700*53ee8cc1Swenshuai.xi #define L_MULLU(a,b)                        \
701*53ee8cc1Swenshuai.xi ({  unsigned _lo_; __l_mulu__(_lo_, a,b);   \
702*53ee8cc1Swenshuai.xi     unsigned _hi_; __l_mfspr1__(_hi_, SPR_MACHI2); \
703*53ee8cc1Swenshuai.xi     ((unsigned long long)_hi_<<32)|(unsigned)_lo_; })
704*53ee8cc1Swenshuai.xi 
705*53ee8cc1Swenshuai.xi #define L_MULLU_LO(a,b)                     \
706*53ee8cc1Swenshuai.xi ({  int _lo_; __l_mulu__(_lo_, a,b); _lo_; })
707*53ee8cc1Swenshuai.xi 
708*53ee8cc1Swenshuai.xi 
709*53ee8cc1Swenshuai.xi #define __l_amull__(lo, a,b)                \
710*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amull\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
711*53ee8cc1Swenshuai.xi #define __l_amullu__(lo, a,b)                \
712*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amullu\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
713*53ee8cc1Swenshuai.xi 
714*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 multiply to 64-bit */
715*53ee8cc1Swenshuai.xi #define L_AMULL(a,b)                        \
716*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amull__(_lo_, a,b);       \
717*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr1__(_hi_, SPR_AMULHI); \
718*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
719*53ee8cc1Swenshuai.xi 
720*53ee8cc1Swenshuai.xi #define L_AMULL_LO(a,b)                     \
721*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amull__(_lo_, a,b); _lo_; })
722*53ee8cc1Swenshuai.xi 
723*53ee8cc1Swenshuai.xi /* Pipelined version of unsigned 32x32 multiply to 64-bit  */
724*53ee8cc1Swenshuai.xi #define L_AMULLU(a,b)                       \
725*53ee8cc1Swenshuai.xi ({  unsigned _lo_; __l_amullu__(_lo_, a,b); \
726*53ee8cc1Swenshuai.xi     unsigned _hi_; __l_mfspr1__(_hi_, SPR_AMULHI); \
727*53ee8cc1Swenshuai.xi     ((unsigned long long)_hi_<<32)|(unsigned)_lo_; })
728*53ee8cc1Swenshuai.xi 
729*53ee8cc1Swenshuai.xi #define L_AMULLU_LO(a,b)                    \
730*53ee8cc1Swenshuai.xi ({  unsigned _lo_; __l_amullu__(_lo_, a,b); _lo_; })
731*53ee8cc1Swenshuai.xi 
732*53ee8cc1Swenshuai.xi 
733*53ee8cc1Swenshuai.xi #define AMAC0       0
734*53ee8cc1Swenshuai.xi #define AMAC1       1
735*53ee8cc1Swenshuai.xi #define AMAC_Q0     0
736*53ee8cc1Swenshuai.xi #define AMAC_Q1     1
737*53ee8cc1Swenshuai.xi 
738*53ee8cc1Swenshuai.xi #define __l_amul__(m, a,b)                      \
739*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amul\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
740*53ee8cc1Swenshuai.xi #define __l_amac__(m, a,b)                      \
741*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amac\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
742*53ee8cc1Swenshuai.xi #define __l_amsb__(m, a,b)                      \
743*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amsb\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
744*53ee8cc1Swenshuai.xi #define __l_amulq__(m, a,q)                     \
745*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.amulq\ta" #m ",%0,q" #q : : "r"(_a_)); }
746*53ee8cc1Swenshuai.xi #define __l_amacq__(m, a,q)                     \
747*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.amacq\ta" #m ",%0,q" #q : : "r"(_a_)); }
748*53ee8cc1Swenshuai.xi #define __l_amsbq__(m, a,q)                     \
749*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.amsbq\ta" #m ",%0,q" #q : : "r"(_a_)); }
750*53ee8cc1Swenshuai.xi 
751*53ee8cc1Swenshuai.xi #define __l_amulu__(m, a,b)                      \
752*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.amulu\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
753*53ee8cc1Swenshuai.xi #define __l_amacu__(m, a,b)                      \
754*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.amacu\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
755*53ee8cc1Swenshuai.xi #define __l_amsbu__(m, a,b)                      \
756*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.amsbu\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
757*53ee8cc1Swenshuai.xi #define __l_amuluq__(m, a,q)                     \
758*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a); __asm__ volatile ("l.amulqu\ta" #m ",%0,q" #q : : "r"(_a_)); }
759*53ee8cc1Swenshuai.xi #define __l_amacuq__(m, a,q)                     \
760*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a); __asm__ volatile ("l.amacqu\ta" #m ",%0,q" #q : : "r"(_a_)); }
761*53ee8cc1Swenshuai.xi #define __l_amsbuq__(m, a,q)                     \
762*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a); __asm__ volatile ("l.amsbqu\ta" #m ",%0,q" #q : : "r"(_a_)); }
763*53ee8cc1Swenshuai.xi 
764*53ee8cc1Swenshuai.xi #define __l_amul_rs__(lo, m,a,b, bits)          \
765*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amul_rs\t%0,a" #m ",%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
766*53ee8cc1Swenshuai.xi #define __l_amac_rs__(lo, m,a,b, bits)          \
767*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amac_rs\t%0,a" #m ",%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
768*53ee8cc1Swenshuai.xi #define __l_amsb_rs__(lo, m,a,b, bits)          \
769*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amsb_rs\t%0,a" #m ",%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
770*53ee8cc1Swenshuai.xi #define __l_amulq_rs__(lo, m,a,q, bits)         \
771*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.amulq_rs\t%0,a" #m ",%1,q" #q ",%2": "=r"(lo) : "r"(_a_), "K"(bits)); }
772*53ee8cc1Swenshuai.xi #define __l_amacq_rs__(lo, m,a,q, bits)         \
773*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.amacq_rs\t%0,a" #m ",%1,q" #q ",%2": "=r"(lo) : "r"(_a_), "K"(bits)); }
774*53ee8cc1Swenshuai.xi #define __l_amsbq_rs__(lo, m,a,q, bits)         \
775*53ee8cc1Swenshuai.xi    { int _a_ = (a); __asm__ volatile ("l.amsbq_rs\t%0,a" #m ",%1,q" #q ",%2": "=r"(lo) : "r"(_a_), "K"(bits)); }
776*53ee8cc1Swenshuai.xi 
777*53ee8cc1Swenshuai.xi #define __l_amacr_lo__(m, lo, mode, bits)       \
778*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.amacr\t%0,a" #m ",%1,%2\t# CLR|RND|SHT|SAT" : "=r"(lo) : "K"(mode), "K"(bits))
779*53ee8cc1Swenshuai.xi #define __l_mfspr_amachi__(m, hi)               \
780*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mfspr\t%0,r0,%1\t# amac\ta" #m "hi" : "=r"(hi) : "K"(SPR_AMACHI0+2*m))
781*53ee8cc1Swenshuai.xi #define __l_mfspr_amacguard__(m, g)            \
782*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mfspr\t%0,r0,%1\t# amac\ta" #m "g" : "=r"(g) : "K"(SPR_AMACGUARD0+m))
783*53ee8cc1Swenshuai.xi 
784*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 mutiply to a 72-bit MAC */
785*53ee8cc1Swenshuai.xi #define L_AMUL(m, a,b)  __l_amul__(m, a,b)
786*53ee8cc1Swenshuai.xi #define L_AMULU(m, a,b) __l_amulu__(m, a,b)
787*53ee8cc1Swenshuai.xi #define L_AMUL_RS_LO(m, a,b, bits)  ({ int _lo_; __l_amul_rs__(_lo_, m,a,b, bits); _lo_; })
788*53ee8cc1Swenshuai.xi 
789*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 mutiply accumulated to a 72-bit MAC */
790*53ee8cc1Swenshuai.xi #define L_AMAC(m, a,b)  __l_amac__(m, a,b)
791*53ee8cc1Swenshuai.xi #define L_AMACU(m, a,b) __l_amacu__(m, a,b)
792*53ee8cc1Swenshuai.xi #define L_AMAC_RS_LO(m, a,b, bits)  ({ int _lo_; __l_amac_rs__(_lo_, m,a,b, bits); _lo_; })
793*53ee8cc1Swenshuai.xi 
794*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 mutiply substracted from a 72-bit MAC */
795*53ee8cc1Swenshuai.xi #define L_AMSB(m, a,b)  __l_amsb__(m, a,b)
796*53ee8cc1Swenshuai.xi #define L_AMSBU(m, a,b) __l_amsbu__(m, a,b)
797*53ee8cc1Swenshuai.xi #define L_AMSB_RS_LO(m, a,b, bits)  ({ int _lo_; __l_amsb_rs__(_lo_, m,a,b, bits); _lo_; })
798*53ee8cc1Swenshuai.xi 
799*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 register * amacq mutiply to a 72-bit MAC */
800*53ee8cc1Swenshuai.xi #define L_AMULQ(m, a,q)   __l_amulq__(m, a,q)
801*53ee8cc1Swenshuai.xi #define L_AMULUQ(m, a,q)  __l_amuluq__(m, a,q)
802*53ee8cc1Swenshuai.xi #define L_AMULQ_RS_LO(m, a,q, bits)  ({ int _lo_; __l_amulq_rs__(_lo_, m,a,q, bits); _lo_; })
803*53ee8cc1Swenshuai.xi 
804*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 register * amacq mutiply accumulated to a 72-bit MAC */
805*53ee8cc1Swenshuai.xi #define L_AMACQ(m, a,q)   __l_amacq__(m, a,q)
806*53ee8cc1Swenshuai.xi #define L_AMACUQ(m, a,q)  __l_amacuq__(m, a,q)
807*53ee8cc1Swenshuai.xi #define L_AMACQ_RS_LO(m, a,q, bits)  ({ int _lo_; __l_amacq_rs__(_lo_, m,a,q, bits); _lo_; })
808*53ee8cc1Swenshuai.xi 
809*53ee8cc1Swenshuai.xi /* Pipelined version of signed 32x32 register * amacq mutiply substracted from a 72-bit MAC */
810*53ee8cc1Swenshuai.xi #define L_AMSBQ(m, a,q)   __l_amsbq__(m, a,q)
811*53ee8cc1Swenshuai.xi #define L_AMSBUQ(m, a,q)  __l_amsbuq__(m, a,q)
812*53ee8cc1Swenshuai.xi #define L_AMSBQ_RS_LO(m, a,q, bits)  ({ int _lo_; __l_amsbq_rs__(_lo_, m,a,q, bits); _lo_; })
813*53ee8cc1Swenshuai.xi 
814*53ee8cc1Swenshuai.xi /* Pipelined version of setting a 72-bit MAC */
815*53ee8cc1Swenshuai.xi #define L_AMACW(m, hi, lo)                  \
816*53ee8cc1Swenshuai.xi    { int _hi_ = (hi), _lo_ = (lo); __asm__ volatile ("l.amacw\ta" #m ",%0,%1" : : "r"(_hi_), "r"(_lo_)); }
817*53ee8cc1Swenshuai.xi #define L_AMACW0(m)                         \
818*53ee8cc1Swenshuai.xi    { __asm__ volatile ("l.amacw\ta" #m ",r0,r0"); }
819*53ee8cc1Swenshuai.xi 
820*53ee8cc1Swenshuai.xi /* Pipelined version of reading a 72-bit MAC, optional round, shift, saturate, and clear (in that order)*/
821*53ee8cc1Swenshuai.xi #define AMACR_CLR   8   // clear
822*53ee8cc1Swenshuai.xi #define AMACR_RND   4   // round
823*53ee8cc1Swenshuai.xi #define AMACR_SAT   2   // saturate
824*53ee8cc1Swenshuai.xi #define AMACR_SHR   1   // shift
825*53ee8cc1Swenshuai.xi 
826*53ee8cc1Swenshuai.xi #if 0 // doesn't work because SPR_AMACHI/LO isn't updated when !AMACR_CLR, or cleared when AMACR_CLR.  In both case, _hi_ isn't what we want
827*53ee8cc1Swenshuai.xi #define L_AMACR(m, mode, bits)              \
828*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
829*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
830*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
831*53ee8cc1Swenshuai.xi #endif // 0
832*53ee8cc1Swenshuai.xi 
833*53ee8cc1Swenshuai.xi #define L_AMACR_LO(m, mode, bits)           \
834*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
835*53ee8cc1Swenshuai.xi 
836*53ee8cc1Swenshuai.xi #define L_MFSPR_AMACHI(m)                   \
837*53ee8cc1Swenshuai.xi ({  int _hi_; __l_mfspr_amachi__(m, _hi_); _hi; })
838*53ee8cc1Swenshuai.xi 
839*53ee8cc1Swenshuai.xi #define L_MFSPR_GUARD(m)                    \
840*53ee8cc1Swenshuai.xi ({  int _g_; __l_mfspr_amacguard__(m, _g_); _g; })
841*53ee8cc1Swenshuai.xi 
842*53ee8cc1Swenshuai.xi 
843*53ee8cc1Swenshuai.xi /* Double-load */
844*53ee8cc1Swenshuai.xi #define __l_ldww__(ldww, q, mask, r, addr, offset)    \
845*53ee8cc1Swenshuai.xi     __asm__ volatile (ldww "\t%0,%1(%2),q" #q ",%3" : "=r"(r) : "I"(offset), "r"(addr), "K"(mask));
846*53ee8cc1Swenshuai.xi 
847*53ee8cc1Swenshuai.xi #define L_LDWW(q, mask, addr, offset)       \
848*53ee8cc1Swenshuai.xi ({  int _r_; __l_ldww__("l.ldww", q, mask, _r_, addr, offset); _r_; })
849*53ee8cc1Swenshuai.xi 
850*53ee8cc1Swenshuai.xi #define L_LDWWX(q, mask, addr, offset)      \
851*53ee8cc1Swenshuai.xi ({  int _r_; __l_ldww__("l.ldwwx", q, mask, _r_, addr, offset); _r_; })
852*53ee8cc1Swenshuai.xi 
853*53ee8cc1Swenshuai.xi #define L_LDWW2(q, mask, addr, offset)      \
854*53ee8cc1Swenshuai.xi ({  int _r_; __l_ldww__("l.ldww2", q, mask, _r_, addr, offset); _r_; })
855*53ee8cc1Swenshuai.xi 
856*53ee8cc1Swenshuai.xi #define L_LDWW2X(q, mask, addr, offset)     \
857*53ee8cc1Swenshuai.xi ({  int _r_; __l_ldww__("l.ldww2x", q, mask, _r_, addr, offset); _r_; })
858*53ee8cc1Swenshuai.xi 
859*53ee8cc1Swenshuai.xi 
860*53ee8cc1Swenshuai.xi /* AMAC combo */
861*53ee8cc1Swenshuai.xi #if 0 // doesn't work because SPR_AMACHI/LO isn't updated when !AMACR_CLR, or cleared when AMACR_CLR.  In both case, _hi_ isn't what we want
862*53ee8cc1Swenshuai.xi #define L_AMAC_p(m, a1,b1, mode, bits)      \
863*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
864*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
865*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
866*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
867*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
868*53ee8cc1Swenshuai.xi 
869*53ee8cc1Swenshuai.xi #define L_AMACQ_p(m, a1, q, mode, bits)     \
870*53ee8cc1Swenshuai.xi ({  __l_amulq__(m, a1,q);                   \
871*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
872*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
873*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
874*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
875*53ee8cc1Swenshuai.xi 
876*53ee8cc1Swenshuai.xi #define L_AMAC_pp(m, a1,b1, a2,b2, mode, bits) \
877*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
878*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
879*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
880*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
881*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
882*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
883*53ee8cc1Swenshuai.xi #endif // 0
884*53ee8cc1Swenshuai.xi 
885*53ee8cc1Swenshuai.xi #define L_AMAC_p_LO(m, a1,b1, mode, bits)   \
886*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
887*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
888*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
889*53ee8cc1Swenshuai.xi 
890*53ee8cc1Swenshuai.xi #define L_AMACQ_p_LO(m, a1, q, mode, bits)  \
891*53ee8cc1Swenshuai.xi ({  __l_amulq__(m, a1,q);                   \
892*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
893*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
894*53ee8cc1Swenshuai.xi 
895*53ee8cc1Swenshuai.xi #define L_AMAC_pp_LO(m, a1,b1, a2,b2, mode, bits) \
896*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
897*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
898*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
899*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
900*53ee8cc1Swenshuai.xi 
901*53ee8cc1Swenshuai.xi #define L_AMACU_pp_LO(m, a1,b1, a2,b2, mode, bits) \
902*53ee8cc1Swenshuai.xi ({  __l_amulu__(m, a1,b1);                  \
903*53ee8cc1Swenshuai.xi     __l_amacu__(m, a2,b2);                  \
904*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
905*53ee8cc1Swenshuai.xi     unsigned _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
906*53ee8cc1Swenshuai.xi 
907*53ee8cc1Swenshuai.xi #define L_AMAC_pq_LO(m, a1,b1, a2,q2, mode, bits) \
908*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
909*53ee8cc1Swenshuai.xi     __l_amacq__(m, a2,q2);                  \
910*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
911*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
912*53ee8cc1Swenshuai.xi 
913*53ee8cc1Swenshuai.xi #define L_AMAC_pm_LO(m, a1,b1, a2,b2, mode, bits) \
914*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
915*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
916*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
917*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
918*53ee8cc1Swenshuai.xi 
919*53ee8cc1Swenshuai.xi #define L_AMAC_ppp_LO(m, a1,b1, a2,b2, a3,b3, mode, bits) \
920*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
921*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
922*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
923*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
924*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
925*53ee8cc1Swenshuai.xi 
926*53ee8cc1Swenshuai.xi #define L_AMAC_ppm_LO(m, a1,b1, a2,b2, a3,b3, mode, bits) \
927*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
928*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
929*53ee8cc1Swenshuai.xi     __l_amsb__(m, a3,b3);                   \
930*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
931*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
932*53ee8cc1Swenshuai.xi 
933*53ee8cc1Swenshuai.xi #define L_AMAC_pmm_LO(m, a1,b1, a2,b2, a3,b3, mode, bits) \
934*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
935*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
936*53ee8cc1Swenshuai.xi     __l_amsb__(m, a3,b3);                   \
937*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
938*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
939*53ee8cc1Swenshuai.xi 
940*53ee8cc1Swenshuai.xi #define L_AMAC_pppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
941*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
942*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
943*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
944*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
945*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
946*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
947*53ee8cc1Swenshuai.xi 
948*53ee8cc1Swenshuai.xi #define L_AMAC_ppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
949*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
950*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
951*53ee8cc1Swenshuai.xi     __l_amsb__(m, a3,b3);                   \
952*53ee8cc1Swenshuai.xi     __l_amsb__(m, a4,b4);                   \
953*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
954*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
955*53ee8cc1Swenshuai.xi 
956*53ee8cc1Swenshuai.xi #define L_AMAC_pmpm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
957*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
958*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
959*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
960*53ee8cc1Swenshuai.xi     __l_amsb__(m, a4,b4);                   \
961*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
962*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
963*53ee8cc1Swenshuai.xi 
964*53ee8cc1Swenshuai.xi #define L_AMAC_pmmp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
965*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
966*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
967*53ee8cc1Swenshuai.xi     __l_amsb__(m, a3,b3);                   \
968*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
969*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
970*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
971*53ee8cc1Swenshuai.xi 
972*53ee8cc1Swenshuai.xi #define L_AMAC_ppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, mode, bits) \
973*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
974*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
975*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
976*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
977*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
978*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
979*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
980*53ee8cc1Swenshuai.xi 
981*53ee8cc1Swenshuai.xi #define L_AMAC_pppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, mode, bits) \
982*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
983*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
984*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
985*53ee8cc1Swenshuai.xi     __l_amsb__(m, a4,b4);                   \
986*53ee8cc1Swenshuai.xi     __l_amsb__(m, a5,b5);                   \
987*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
988*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
989*53ee8cc1Swenshuai.xi 
990*53ee8cc1Swenshuai.xi #define L_AMAC_pppppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, a9,b9, a10,b10, mode, bits) \
991*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
992*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
993*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
994*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
995*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
996*53ee8cc1Swenshuai.xi     __l_amac__(m, a6,b6);                   \
997*53ee8cc1Swenshuai.xi     __l_amac__(m, a7,b7);                   \
998*53ee8cc1Swenshuai.xi     __l_amac__(m, a8,b8);                   \
999*53ee8cc1Swenshuai.xi     __l_amac__(m, a9,b9);                   \
1000*53ee8cc1Swenshuai.xi     __l_amac__(m, a10,b10);                 \
1001*53ee8cc1Swenshuai.xi     HAZARD(__asm__ volatile ("l.nop"));     \
1002*53ee8cc1Swenshuai.xi     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
1003*53ee8cc1Swenshuai.xi 
1004*53ee8cc1Swenshuai.xi /* AMAC combo: 1 instruction short, always shift-rounding */
1005*53ee8cc1Swenshuai.xi #if 0 // doesn't work because SPR_AMACHI/LO isn't updated when !AMACR_CLR, or cleared when AMACR_CLR.  In both case, _hi_ isn't what we want
1006*53ee8cc1Swenshuai.xi #define L_AMAC_RS_p(m, a1,b1, bits)         \
1007*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amul_rs__(_lo_, m,a1,b1, bits); \
1008*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
1009*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
1010*53ee8cc1Swenshuai.xi 
1011*53ee8cc1Swenshuai.xi #define L_AMACQ_RS_p(m, a1, q, bits)        \
1012*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amulq_rs__(_lo_, m,a1,q, bits); \
1013*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
1014*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
1015*53ee8cc1Swenshuai.xi 
1016*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pp(m, a1,b1, a2,b2, bits) \
1017*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1018*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a2,b2, bits); \
1019*53ee8cc1Swenshuai.xi     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
1020*53ee8cc1Swenshuai.xi     ((long long)_hi_<<32)|(unsigned)_lo_; })
1021*53ee8cc1Swenshuai.xi #endif // 0
1022*53ee8cc1Swenshuai.xi 
1023*53ee8cc1Swenshuai.xi #define L_AMAC_RS_p_LO(m, a1,b1, bits)      \
1024*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amul_rs__(_lo_, m,a1,b1, bits); _lo_; })
1025*53ee8cc1Swenshuai.xi 
1026*53ee8cc1Swenshuai.xi #define L_AMACQ_RS_p_LO(m, a1, q, bits)     \
1027*53ee8cc1Swenshuai.xi ({  int _lo_; __l_amulq_rs__(_lo_, m,a1,q, bits); _lo_; })
1028*53ee8cc1Swenshuai.xi 
1029*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pp_LO(m, a1,b1, a2,b2, bits) \
1030*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1031*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a2,b2, bits); _lo_; })
1032*53ee8cc1Swenshuai.xi 
1033*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pq_LO(m, a1,b1, a2,q2, bits) \
1034*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1035*53ee8cc1Swenshuai.xi     int _lo_; __l_amacq_rs__(_lo_, m,a2,q2, bits); _lo_; })
1036*53ee8cc1Swenshuai.xi 
1037*53ee8cc1Swenshuai.xi #define L_AMAC_RS_qq_LO(m, a1,q1, a2,q2, bits) \
1038*53ee8cc1Swenshuai.xi ({  __l_amulq__(m, a1,q1);                  \
1039*53ee8cc1Swenshuai.xi     int _lo_; __l_amacq_rs__(_lo_, m,a2,q2, bits); _lo_; })
1040*53ee8cc1Swenshuai.xi 
1041*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pm_LO(m, a1,b1, a2,b2, bits) \
1042*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1043*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a2,b2, bits); _lo_; })
1044*53ee8cc1Swenshuai.xi 
1045*53ee8cc1Swenshuai.xi #define L_AMAC_RS_qn_LO(m, a1,q1, a2,q2, bits) \
1046*53ee8cc1Swenshuai.xi ({  __l_amulq__(m, a1,q1);                   \
1047*53ee8cc1Swenshuai.xi     int _lo_; __l_amsbq_rs__(_lo_, m,a2,q2, bits); _lo_; })
1048*53ee8cc1Swenshuai.xi 
1049*53ee8cc1Swenshuai.xi #define L_AMAC_RS_ppp_LO(m, a1,b1, a2,b2, a3,b3, bits) \
1050*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1051*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1052*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a3,b3, bits); _lo_; })
1053*53ee8cc1Swenshuai.xi 
1054*53ee8cc1Swenshuai.xi #define L_AMAC_RS_ppm_LO(m, a1,b1, a2,b2, a3,b3, bits) \
1055*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1056*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1057*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a3,b3, bits); _lo_; })
1058*53ee8cc1Swenshuai.xi 
1059*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pmm_LO(m, a1,b1, a2,b2, a3,b3, bits) \
1060*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1061*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
1062*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a3,b3, bits); _lo_; })
1063*53ee8cc1Swenshuai.xi 
1064*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1065*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1066*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1067*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1068*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a4,b4, bits); _lo_; })
1069*53ee8cc1Swenshuai.xi 
1070*53ee8cc1Swenshuai.xi #define L_AMAC_RS_ppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1071*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1072*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1073*53ee8cc1Swenshuai.xi     __l_amsb__(m, a3,b3);                   \
1074*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a4,b4, bits); _lo_; })
1075*53ee8cc1Swenshuai.xi 
1076*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pmpm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1077*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1078*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
1079*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1080*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a4,b4, bits); _lo_; })
1081*53ee8cc1Swenshuai.xi 
1082*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pmmp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1083*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1084*53ee8cc1Swenshuai.xi     __l_amsb__(m, a2,b2);                   \
1085*53ee8cc1Swenshuai.xi     __l_amsb__(m, a3,b3);                   \
1086*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a4,b4, bits); _lo_; })
1087*53ee8cc1Swenshuai.xi 
1088*53ee8cc1Swenshuai.xi #define L_AMAC_RS_ppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, bits) \
1089*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1090*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1091*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1092*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
1093*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a5,b5, bits); _lo_; })
1094*53ee8cc1Swenshuai.xi 
1095*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, bits) \
1096*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1097*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1098*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1099*53ee8cc1Swenshuai.xi     __l_amsb__(m, a4,b4);                   \
1100*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a5,b5, bits); _lo_; })
1101*53ee8cc1Swenshuai.xi 
1102*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, bits) \
1103*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1104*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1105*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1106*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
1107*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
1108*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a6,b6, bits); _lo_; })
1109*53ee8cc1Swenshuai.xi 
1110*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, bits) \
1111*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1112*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1113*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1114*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
1115*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
1116*53ee8cc1Swenshuai.xi     __l_amac__(m, a6,b6);                   \
1117*53ee8cc1Swenshuai.xi     __l_amac__(m, a7,b7);                   \
1118*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a8,b8, bits); _lo_; })
1119*53ee8cc1Swenshuai.xi 
1120*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pppppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, a9,b9, a10,b10, bits) \
1121*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1122*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1123*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1124*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
1125*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
1126*53ee8cc1Swenshuai.xi     __l_amac__(m, a6,b6);                   \
1127*53ee8cc1Swenshuai.xi     __l_amac__(m, a7,b7);                   \
1128*53ee8cc1Swenshuai.xi     __l_amac__(m, a8,b8);                   \
1129*53ee8cc1Swenshuai.xi     __l_amac__(m, a9,b9);                   \
1130*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a10,b10, bits); _lo_; })
1131*53ee8cc1Swenshuai.xi 
1132*53ee8cc1Swenshuai.xi #define L_AMAC_RS_ppppppppmmmmmmmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, \
1133*53ee8cc1Swenshuai.xi                             a9,b9, a10,b10, a11,b11, a12,b12, a13,b13, a14,b14, a15,b15, a16,b16, bits) \
1134*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1135*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1136*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1137*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
1138*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
1139*53ee8cc1Swenshuai.xi     __l_amac__(m, a6,b6);                   \
1140*53ee8cc1Swenshuai.xi     __l_amac__(m, a7,b7);                   \
1141*53ee8cc1Swenshuai.xi     __l_amac__(m, a8,b8);                   \
1142*53ee8cc1Swenshuai.xi     __l_amsb__(m, a9,b9);                   \
1143*53ee8cc1Swenshuai.xi     __l_amsb__(m, a10,b10);                 \
1144*53ee8cc1Swenshuai.xi     __l_amsb__(m, a11,b11);                 \
1145*53ee8cc1Swenshuai.xi     __l_amsb__(m, a12,b12);                 \
1146*53ee8cc1Swenshuai.xi     __l_amsb__(m, a13,b13);                 \
1147*53ee8cc1Swenshuai.xi     __l_amsb__(m, a14,b14);                 \
1148*53ee8cc1Swenshuai.xi     __l_amsb__(m, a15,b15);                 \
1149*53ee8cc1Swenshuai.xi     int _lo_; __l_amsb_rs__(_lo_, m,a16,b16, bits); _lo_; })
1150*53ee8cc1Swenshuai.xi 
1151*53ee8cc1Swenshuai.xi #define L_AMAC_RS_pppppppppppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, \
1152*53ee8cc1Swenshuai.xi                             a9,b9, a10,b10, a11,b11, a12,b12, a13,b13, a14,b14, a15,b15, a16,b16, bits) \
1153*53ee8cc1Swenshuai.xi ({  __l_amul__(m, a1,b1);                   \
1154*53ee8cc1Swenshuai.xi     __l_amac__(m, a2,b2);                   \
1155*53ee8cc1Swenshuai.xi     __l_amac__(m, a3,b3);                   \
1156*53ee8cc1Swenshuai.xi     __l_amac__(m, a4,b4);                   \
1157*53ee8cc1Swenshuai.xi     __l_amac__(m, a5,b5);                   \
1158*53ee8cc1Swenshuai.xi     __l_amac__(m, a6,b6);                   \
1159*53ee8cc1Swenshuai.xi     __l_amac__(m, a7,b7);                   \
1160*53ee8cc1Swenshuai.xi     __l_amac__(m, a8,b8);                   \
1161*53ee8cc1Swenshuai.xi     __l_amac__(m, a9,b9);                   \
1162*53ee8cc1Swenshuai.xi     __l_amac__(m, a10,b10);                 \
1163*53ee8cc1Swenshuai.xi     __l_amac__(m, a11,b11);                 \
1164*53ee8cc1Swenshuai.xi     __l_amac__(m, a12,b12);                 \
1165*53ee8cc1Swenshuai.xi     __l_amac__(m, a13,b13);                 \
1166*53ee8cc1Swenshuai.xi     __l_amac__(m, a14,b14);                 \
1167*53ee8cc1Swenshuai.xi     __l_amac__(m, a15,b15);                 \
1168*53ee8cc1Swenshuai.xi     int _lo_; __l_amac_rs__(_lo_, m,a16,b16, bits); _lo_; })
1169*53ee8cc1Swenshuai.xi 
1170*53ee8cc1Swenshuai.xi #define __l_divl__(lo, a,b, bits)           \
1171*53ee8cc1Swenshuai.xi    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.divl\t%0,%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
1172*53ee8cc1Swenshuai.xi #define __l_divlu__(lo, a,b, bits)          \
1173*53ee8cc1Swenshuai.xi    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.divlu\t%0,%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
1174*53ee8cc1Swenshuai.xi 
1175*53ee8cc1Swenshuai.xi /* Left-shift before divide */
1176*53ee8cc1Swenshuai.xi #define L_DIVL(a,b, bits)  ({ int _lo_; __l_divl__(_lo_, a,b, bits); _lo_; })
1177*53ee8cc1Swenshuai.xi #define L_DIVLU(a,b, bits)  ({ unsigned _lo_; __l_divlu__(_lo_, a,b, bits); _lo_; })
1178*53ee8cc1Swenshuai.xi 
1179*53ee8cc1Swenshuai.xi /* Arithmetic-Shift-Right & Clip to 16-bit */
1180*53ee8cc1Swenshuai.xi 
1181*53ee8cc1Swenshuai.xi #define __SRA_CLIP16(shift_right, t)                                    \
1182*53ee8cc1Swenshuai.xi ({  int _u_, _t_ = t;                                                   \
1183*53ee8cc1Swenshuai.xi     __asm__("l.srai     %0,%0,%1"     : "+r"(_t_)  : "K"(shift_right)); \
1184*53ee8cc1Swenshuai.xi     __asm__("l.sfgesi   %0,-32768"    :          : "r"(_t_));           \
1185*53ee8cc1Swenshuai.xi     __asm__("l.addi     %0,r0,-32768" : "=r"(_u_)  :       );           \
1186*53ee8cc1Swenshuai.xi     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1187*53ee8cc1Swenshuai.xi     __asm__("l.sflesi   %0,32767"     :          : "r"(_t_));           \
1188*53ee8cc1Swenshuai.xi     __asm__("l.addi     %0,r0,32767"  : "=r"(_u_)  :       );           \
1189*53ee8cc1Swenshuai.xi     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1190*53ee8cc1Swenshuai.xi     _t_; })
1191*53ee8cc1Swenshuai.xi 
1192*53ee8cc1Swenshuai.xi #define __CLIP16(t)                                                     \
1193*53ee8cc1Swenshuai.xi ({  int _u_, _t_ = t;                                                   \
1194*53ee8cc1Swenshuai.xi     __asm__("l.sfgesi   %0,-32768"    :          : "r"(_t_));           \
1195*53ee8cc1Swenshuai.xi     __asm__("l.addi     %0,r0,-32768" : "=r"(_u_)  :       );           \
1196*53ee8cc1Swenshuai.xi     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1197*53ee8cc1Swenshuai.xi     __asm__("l.sflesi   %0,32767"     :          : "r"(_t_));           \
1198*53ee8cc1Swenshuai.xi     __asm__("l.addi     %0,r0,32767"  : "=r"(_u_)  :       );           \
1199*53ee8cc1Swenshuai.xi     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1200*53ee8cc1Swenshuai.xi     _t_; })
1201*53ee8cc1Swenshuai.xi 
1202*53ee8cc1Swenshuai.xi #define __SRA_CLIP16_ex(shift_right, t)     \
1203*53ee8cc1Swenshuai.xi ({  int _result_, _t_ = t;                  \
1204*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.srari\t%0,%1,%2" : "=r"(_result_) : "r"(_t_), "K"(shift_right) ); \
1205*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.sat\t%0,%1,16" : "=r"(_result_) : "r"(_result_) ); \
1206*53ee8cc1Swenshuai.xi     _result_;})
1207*53ee8cc1Swenshuai.xi 
1208*53ee8cc1Swenshuai.xi #define __CLIP16_ex(t)      \
1209*53ee8cc1Swenshuai.xi ({  int _result_, _t_ = t;  \
1210*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.sat\t%0,%1,16" : "=r"(_result_) : "r"(_t_) ); \
1211*53ee8cc1Swenshuai.xi     _result_;})
1212*53ee8cc1Swenshuai.xi 
1213*53ee8cc1Swenshuai.xi /* Extract, Deposit, and Saturate */
1214*53ee8cc1Swenshuai.xi 
1215*53ee8cc1Swenshuai.xi #define L_EXT(v, width, bitpos)             \
1216*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v, _b_ = bitpos;    \
1217*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.ext\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "r"(_b_)); \
1218*53ee8cc1Swenshuai.xi     _result_;})
1219*53ee8cc1Swenshuai.xi 
1220*53ee8cc1Swenshuai.xi #define L_EXTI(v, width, bitpos)            \
1221*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v;                  \
1222*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.exti\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "K"(bitpos)); \
1223*53ee8cc1Swenshuai.xi     _result_;})
1224*53ee8cc1Swenshuai.xi 
1225*53ee8cc1Swenshuai.xi #define L_EXTU(v, width, bitpos)            \
1226*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v, _b_ = bitpos;    \
1227*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.extu\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "r"(_b_)); \
1228*53ee8cc1Swenshuai.xi     _result_;})
1229*53ee8cc1Swenshuai.xi 
1230*53ee8cc1Swenshuai.xi #define L_EXTUI(v, width, bitpos)           \
1231*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v;                  \
1232*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.extui\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "K"(bitpos)); \
1233*53ee8cc1Swenshuai.xi     _result_;})
1234*53ee8cc1Swenshuai.xi 
1235*53ee8cc1Swenshuai.xi #define L_EXTR(v, width, bitpos)            \
1236*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v, _width_ = width, _b_ = bitpos;    \
1237*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.extr\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "r"(_b_)); \
1238*53ee8cc1Swenshuai.xi     _result_;})
1239*53ee8cc1Swenshuai.xi 
1240*53ee8cc1Swenshuai.xi #define L_EXTRI(v, width, bitpos)           \
1241*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v, _width_ = width; \
1242*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.extri\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "K"(bitpos)); \
1243*53ee8cc1Swenshuai.xi     _result_;})
1244*53ee8cc1Swenshuai.xi 
1245*53ee8cc1Swenshuai.xi #define L_EXTRU(v, width, bitpos)           \
1246*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v, _width_ = width, _b_ = bitpos; \
1247*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.extru\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "r"(_b_)); \
1248*53ee8cc1Swenshuai.xi     _result_;})
1249*53ee8cc1Swenshuai.xi 
1250*53ee8cc1Swenshuai.xi #define L_EXTRUI(v, width, bitpos)          \
1251*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v, _width_ = width; \
1252*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.extrui\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "K"(bitpos)); \
1253*53ee8cc1Swenshuai.xi     _result_;})
1254*53ee8cc1Swenshuai.xi 
1255*53ee8cc1Swenshuai.xi #define L_DEP(d, v, width, bitpos)          \
1256*53ee8cc1Swenshuai.xi ({  int _v_ = v, _b_ = bitpos;              \
1257*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.dep\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "r"(_b_)); \
1258*53ee8cc1Swenshuai.xi     d;})
1259*53ee8cc1Swenshuai.xi 
1260*53ee8cc1Swenshuai.xi #define L_DEPI(v, width, bitpos)            \
1261*53ee8cc1Swenshuai.xi ({  int _v_ = v;                            \
1262*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.depi\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "K"(bitpos)); \
1263*53ee8cc1Swenshuai.xi     d;})
1264*53ee8cc1Swenshuai.xi 
1265*53ee8cc1Swenshuai.xi #define L_DEPR(d, v, width, bitpos)         \
1266*53ee8cc1Swenshuai.xi ({  int _v_ = v, _width_ = width, _b_ = bitpos; \
1267*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.depr\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "r"(_width_), "r"(_b_)); \
1268*53ee8cc1Swenshuai.xi     d;})
1269*53ee8cc1Swenshuai.xi 
1270*53ee8cc1Swenshuai.xi #define L_DEPRI(v, width, bitpos)           \
1271*53ee8cc1Swenshuai.xi ({  int _v_ = v, _width_ = width;           \
1272*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.depri\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "r"(_width_), "K"(bitpos)); \
1273*53ee8cc1Swenshuai.xi     d;})
1274*53ee8cc1Swenshuai.xi 
1275*53ee8cc1Swenshuai.xi #define L_DEPOV(d)                          \
1276*53ee8cc1Swenshuai.xi ({  __asm__ volatile ("l.depov\t%0" : "+r"(d) : ); \
1277*53ee8cc1Swenshuai.xi     d;})
1278*53ee8cc1Swenshuai.xi 
1279*53ee8cc1Swenshuai.xi #define L_SAT(v, width)                     \
1280*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v;                  \
1281*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.sat\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1282*53ee8cc1Swenshuai.xi     _result_;})
1283*53ee8cc1Swenshuai.xi 
1284*53ee8cc1Swenshuai.xi #define L_SATU(v, width)                    \
1285*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v;                  \
1286*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satu\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1287*53ee8cc1Swenshuai.xi     _result_;})
1288*53ee8cc1Swenshuai.xi 
1289*53ee8cc1Swenshuai.xi #define L_SATSU(v, width)                   \
1290*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v;                  \
1291*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satsu\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1292*53ee8cc1Swenshuai.xi     _result_;})
1293*53ee8cc1Swenshuai.xi 
1294*53ee8cc1Swenshuai.xi #define L_SATUS(v, width)                   \
1295*53ee8cc1Swenshuai.xi ({  int _result_, _v_ = v;                  \
1296*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satus\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1297*53ee8cc1Swenshuai.xi     _result_;})
1298*53ee8cc1Swenshuai.xi 
1299*53ee8cc1Swenshuai.xi #define L_SATDEP(d, v, width, bitpos)       \
1300*53ee8cc1Swenshuai.xi ({  int _v_ = v, _b_ = bitpos;    \
1301*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satdep\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "r"(_b_)); \
1302*53ee8cc1Swenshuai.xi     d;})
1303*53ee8cc1Swenshuai.xi 
1304*53ee8cc1Swenshuai.xi #define L_SATDEPI(d, v, width, bitpos)      \
1305*53ee8cc1Swenshuai.xi ({  int _v_ = v;                  \
1306*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satdepi\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "K"(bitpos)); \
1307*53ee8cc1Swenshuai.xi     d;})
1308*53ee8cc1Swenshuai.xi 
1309*53ee8cc1Swenshuai.xi #define L_SATDEPU(d, v, width, bitpos)      \
1310*53ee8cc1Swenshuai.xi ({  int _v_ = v, _b_ = bitpos;    \
1311*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satdepu\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "r"(_b_)); \
1312*53ee8cc1Swenshuai.xi     d;})
1313*53ee8cc1Swenshuai.xi 
1314*53ee8cc1Swenshuai.xi #define L_SATDEPUI(d, v, width, bitpos)     \
1315*53ee8cc1Swenshuai.xi ({  int _v_ = v;                  \
1316*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.satdepui\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "K"(bitpos)); \
1317*53ee8cc1Swenshuai.xi     d;})
1318*53ee8cc1Swenshuai.xi 
1319*53ee8cc1Swenshuai.xi /* FFT */
1320*53ee8cc1Swenshuai.xi #define __l_fft__(fft, n, re,im, mode)      \
1321*53ee8cc1Swenshuai.xi    { void *_re_ = (void*)(re), *_im_ = (void*)(im); __asm__ volatile (fft "\t%0,%1,%2,%3" :  : "K"(n), "r"(_re_), "r"(_im_), "K"(mode)); }
1322*53ee8cc1Swenshuai.xi 
1323*53ee8cc1Swenshuai.xi #define L_FFT(n, re,im, mode)       __l_fft__("l.fft", n, re,im, mode)
1324*53ee8cc1Swenshuai.xi #define L_FFTR(n, re,im, mode)      __l_fft__("l.fftr", n, re,im, mode)
1325*53ee8cc1Swenshuai.xi #define L_IFFT(n, re,im, mode)      __l_fft__("l.ifft", n, re,im, mode)
1326*53ee8cc1Swenshuai.xi #define L_IFFTR(n, re,im, mode)     __l_fft__("l.ifftr", n, re,im, mode)
1327*53ee8cc1Swenshuai.xi #define L_WAIT_FOR_FFT  while(L_MFSPR1(SPR_ACCEL_FFT) != 0);
1328*53ee8cc1Swenshuai.xi 
1329*53ee8cc1Swenshuai.xi /* ABS/COUNT1/BYTESWAP/MAX/MIN */
1330*53ee8cc1Swenshuai.xi #define L_ABS(a)                            \
1331*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1332*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.abs\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1333*53ee8cc1Swenshuai.xi     _res_;})
1334*53ee8cc1Swenshuai.xi 
1335*53ee8cc1Swenshuai.xi #define L_COUNT1(a)                         \
1336*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1337*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.count1\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1338*53ee8cc1Swenshuai.xi     _res_;})
1339*53ee8cc1Swenshuai.xi 
1340*53ee8cc1Swenshuai.xi #define L_BYTESWAP(a)                       \
1341*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1342*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.byteswap\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1343*53ee8cc1Swenshuai.xi     _res_;})
1344*53ee8cc1Swenshuai.xi 
1345*53ee8cc1Swenshuai.xi #define L_BITSWAP(a)                        \
1346*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1347*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.bitswap\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1348*53ee8cc1Swenshuai.xi     _res_;})
1349*53ee8cc1Swenshuai.xi 
1350*53ee8cc1Swenshuai.xi #define L_MAX(a,b)                          \
1351*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a, _b_ = b;            \
1352*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.max\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1353*53ee8cc1Swenshuai.xi     _res_;})
1354*53ee8cc1Swenshuai.xi 
1355*53ee8cc1Swenshuai.xi #define L_MAXU(a,b)                         \
1356*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a, _b_ = b;            \
1357*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.maxu\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1358*53ee8cc1Swenshuai.xi     _res_;})
1359*53ee8cc1Swenshuai.xi 
1360*53ee8cc1Swenshuai.xi #define L_MIN(a,b)                          \
1361*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a, _b_ = b;            \
1362*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.min\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1363*53ee8cc1Swenshuai.xi     _res_;})
1364*53ee8cc1Swenshuai.xi 
1365*53ee8cc1Swenshuai.xi #define L_MINU(a,b)                         \
1366*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a, _b_ = b;            \
1367*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.minu\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1368*53ee8cc1Swenshuai.xi     _res_;})
1369*53ee8cc1Swenshuai.xi 
1370*53ee8cc1Swenshuai.xi #define L_MAXI(a,b)                         \
1371*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1372*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.maxi\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1373*53ee8cc1Swenshuai.xi     _res_;})
1374*53ee8cc1Swenshuai.xi 
1375*53ee8cc1Swenshuai.xi #define L_MAXUI(a,b)                        \
1376*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1377*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.maxui\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1378*53ee8cc1Swenshuai.xi     _res_;})
1379*53ee8cc1Swenshuai.xi 
1380*53ee8cc1Swenshuai.xi #define L_MINI(a,b)                         \
1381*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1382*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.mini\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1383*53ee8cc1Swenshuai.xi     _res_;})
1384*53ee8cc1Swenshuai.xi 
1385*53ee8cc1Swenshuai.xi #define L_MINUI(a,b)                        \
1386*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a;                     \
1387*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.minui\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1388*53ee8cc1Swenshuai.xi     _res_;})
1389*53ee8cc1Swenshuai.xi 
1390*53ee8cc1Swenshuai.xi /* Bi-directional shift arithmetic/logical */
1391*53ee8cc1Swenshuai.xi #define L_BSA(a,b)                          \
1392*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a, _b_ = b;            \
1393*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.bsa\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1394*53ee8cc1Swenshuai.xi     _res_;})
1395*53ee8cc1Swenshuai.xi 
1396*53ee8cc1Swenshuai.xi #define L_BSL(a,b)                          \
1397*53ee8cc1Swenshuai.xi ({  int _res_, _a_ = a, _b_ = b;            \
1398*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.bsl\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1399*53ee8cc1Swenshuai.xi     _res_;})
1400*53ee8cc1Swenshuai.xi 
1401*53ee8cc1Swenshuai.xi /* Double-shift */
1402*53ee8cc1Swenshuai.xi #define L_DSL(a,b,cnt)                      \
1403*53ee8cc1Swenshuai.xi ({  int _a_ = a, _b_ = b, _cnt_ = cnt;      \
1404*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.dsl\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "r"(_cnt_)); \
1405*53ee8cc1Swenshuai.xi     _a_;})
1406*53ee8cc1Swenshuai.xi 
1407*53ee8cc1Swenshuai.xi #define L_DSR(a,b,cnt)                      \
1408*53ee8cc1Swenshuai.xi ({  int _a_ = a, _b_ = b, _cnt_ = cnt;      \
1409*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.dsr\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "r"(_cnt_)); \
1410*53ee8cc1Swenshuai.xi     _a_;})
1411*53ee8cc1Swenshuai.xi 
1412*53ee8cc1Swenshuai.xi #define L_DSLI(a,b,cnt)                     \
1413*53ee8cc1Swenshuai.xi ({  int _a_ = a, _b_ = b;                   \
1414*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.dsl\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "K"(cnt)); \
1415*53ee8cc1Swenshuai.xi     _a_;})
1416*53ee8cc1Swenshuai.xi 
1417*53ee8cc1Swenshuai.xi #define L_DSRI(a,b,cnt)                     \
1418*53ee8cc1Swenshuai.xi ({  int _a_ = a, _b_ = b;                   \
1419*53ee8cc1Swenshuai.xi     __asm__ volatile ("l.dsr\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "K"(cnt)); \
1420*53ee8cc1Swenshuai.xi     _a_;})
1421*53ee8cc1Swenshuai.xi 
1422*53ee8cc1Swenshuai.xi #endif /* __aeon__ */
1423*53ee8cc1Swenshuai.xi 
1424*53ee8cc1Swenshuai.xi #endif /* __SPR_DEFS_H */
1425