xref: /utopia/UTPA2-700.0.x/projects/tools/lint/aeon_include/machine/spr_defs.h (revision 53ee8cc121a030b8d368113ac3e966b4705770ef)
1 #ifndef __SPR_DEFS_H
2 #define __SPR_DEFS_H
3 
4 #if 1 //def __aeon__
5 #define MAX_GRPS (32)
6 #define MAX_SPRS_PER_GRP_BITS (11)
7 #define MAX_SPRS_PER_GRP (1 << MAX_SPRS_PER_GRP_BITS)
8 #define MAX_SPRS (0x10000)
9 #define SPR_GRP_OFF(spr)    ((spr) & (MAX_SPRS_PER_GRP - 1))
10 #define SPR_GRP(spr)        ((spr)&(-MAX_SPRS_PER_GRP))
11 
12 /* Base addresses for the groups */
13 #define SPRGROUP_SYS	(0<< MAX_SPRS_PER_GRP_BITS)
14 #define SPRGROUP_DMMU	(1<< MAX_SPRS_PER_GRP_BITS)
15 #define SPRGROUP_IMMU	(2<< MAX_SPRS_PER_GRP_BITS)
16 #define SPRGROUP_DC	(3<< MAX_SPRS_PER_GRP_BITS)
17 #define SPRGROUP_IC	(4<< MAX_SPRS_PER_GRP_BITS)
18 #define SPRGROUP_MAC	(5<< MAX_SPRS_PER_GRP_BITS)
19 #define SPRGROUP_D	(6<< MAX_SPRS_PER_GRP_BITS)
20 #define SPRGROUP_PC	(7<< MAX_SPRS_PER_GRP_BITS)
21 #define SPRGROUP_PM	(8<< MAX_SPRS_PER_GRP_BITS)
22 #define SPRGROUP_PIC	(9<< MAX_SPRS_PER_GRP_BITS)
23 #define SPRGROUP_TT	(10<< MAX_SPRS_PER_GRP_BITS)
24 #define SPRGROUP_ACCEL	(11<< MAX_SPRS_PER_GRP_BITS)
25 
26 #define SPR_GROUP_SYS   SPRGROUP_SYS
27 #define SPR_GROUP_PM    SPRGROUP_PM
28 #define SPR_GROUP_PIC   SPRGROUP_PIC
29 #define SPR_GROUP_TT    SPRGROUP_TT
30 
31 /* System control and status group */
32 #define SPR_VR		(SPRGROUP_SYS + 0)
33 #define SPR_UPR		(SPRGROUP_SYS + 1)
34 #define SPR_CPUCFGR	(SPRGROUP_SYS + 2)
35 #define SPR_DMMUCFGR	(SPRGROUP_SYS + 3)
36 #define SPR_IMMUCFGR	(SPRGROUP_SYS + 4)
37 #define SPR_DCCFGR	(SPRGROUP_SYS + 5)
38 #define SPR_ICCFGR	(SPRGROUP_SYS + 6)
39 #define SPR_DCFGR	(SPRGROUP_SYS + 7)
40 #define SPR_PCCFGR	(SPRGROUP_SYS + 8)
41 #define SPR_NPC         (SPRGROUP_SYS + 16)  /* CZ 21/06/01 */
42 #define SPR_DRETURN	(SPRGROUP_SYS + 16)  /* CZ 21/06/01 */
43 #define SPR_SR		(SPRGROUP_SYS + 17)  /* CZ 21/06/01 */
44 #define SPR_PPC         (SPRGROUP_SYS + 18)  /* CZ 21/06/01 */
45 #define SPR_EPCR_BASE	(SPRGROUP_SYS + 32)  /* CZ 21/06/01 */
46 #define SPR_EPCR_LAST	(SPRGROUP_SYS + 47)  /* CZ 21/06/01 */
47 #define SPR_EEAR_BASE	(SPRGROUP_SYS + 48)
48 #define SPR_EEAR_LAST	(SPRGROUP_SYS + 63)
49 #define SPR_ESR_BASE	(SPRGROUP_SYS + 64)
50 #define SPR_ESR_LAST	(SPRGROUP_SYS + 79)
51 #define SPR_GPR(x)	(SPRGROUP_SYS + 0x400 + (x))
52 
53 #define SPR_SYS_SR			SPR_GRP_OFF(SPR_SR)
54 #define SPR_SYS_ESR_BASE	SPR_GRP_OFF(SPR_ESR_BASE)
55 #define SPR_SYS_ESR_LAST	SPR_GRP_OFF(SPR_ESR_LAST)
56 
57 /* Data MMU group */
58 #define SPR_DMMUCR	(SPRGROUP_DMMU + 0)
59 #define SPR_DTLBMR_BASE(WAY)	(SPRGROUP_DMMU + 0x200 + (WAY) * 0x100)
60 #define SPR_DTLBMR_LAST(WAY)	(SPRGROUP_DMMU + 0x27f + (WAY) * 0x100)
61 #define SPR_DTLBTR_BASE(WAY)	(SPRGROUP_DMMU + 0x280 + (WAY) * 0x100)
62 #define SPR_DTLBTR_LAST(WAY)	(SPRGROUP_DMMU + 0x2ff + (WAY) * 0x100)
63 
64 /* Instruction MMU group */
65 #define SPR_IMMUCR	(SPRGROUP_IMMU + 0)
66 #define SPR_ITLBMR_BASE(WAY)	(SPRGROUP_IMMU + 0x200 + (WAY) * 0x100)
67 #define SPR_ITLBMR_LAST(WAY)	(SPRGROUP_IMMU + 0x27f + (WAY) * 0x100)
68 #define SPR_ITLBTR_BASE(WAY)	(SPRGROUP_IMMU + 0x280 + (WAY) * 0x100)
69 #define SPR_ITLBTR_LAST(WAY)	(SPRGROUP_IMMU + 0x2ff + (WAY) * 0x100)
70 
71 /* Data cache group */
72 #define SPR_DCCR	(SPRGROUP_DC + 0)
73 #define SPR_DCBPR	(SPRGROUP_DC + 1)
74 #define SPR_DCBFR	(SPRGROUP_DC + 2)
75 #define SPR_DCBIR	(SPRGROUP_DC + 3)
76 #define SPR_DCBWR	(SPRGROUP_DC + 4)
77 #define SPR_DCBLR	(SPRGROUP_DC + 5)
78 #define SPR_DCR_BASE(WAY)	(SPRGROUP_DC + 0x200 + (WAY) * 0x200)
79 #define SPR_DCR_LAST(WAY)	(SPRGROUP_DC + 0x3ff + (WAY) * 0x200)
80 
81 /* Instruction cache group */
82 #define SPR_ICCR	(SPRGROUP_IC + 0)
83 #define SPR_ICBPR	(SPRGROUP_IC + 1)
84 #define SPR_ICBIR	(SPRGROUP_IC + 2)
85 #define SPR_ICBLR	(SPRGROUP_IC + 3)
86 #define SPR_ICR_BASE(WAY)	(SPRGROUP_IC + 0x200 + (WAY) * 0x200)
87 #define SPR_ICR_LAST(WAY)	(SPRGROUP_IC + 0x3ff + (WAY) * 0x200)
88 
89 /* MAC group */
90 #define SPR_MACLO 	  (SPRGROUP_MAC + 1)
91 #define SPR_MACHI 	  (SPRGROUP_MAC + 2)
92 #define SPR_MACHI2    (SPRGROUP_MAC + 3)
93 
94 #define SPR_AMULHI	  (SPRGROUP_MAC + 8)
95 #define SPR_AMACLO0	  (SPRGROUP_MAC + 9)
96 #define SPR_AMACHI0	  (SPRGROUP_MAC + 10)
97 #define SPR_AMACLO1	  (SPRGROUP_MAC + 11)
98 #define SPR_AMACHI1	  (SPRGROUP_MAC + 12)
99 #define SPR_AMACCFG   (SPRGROUP_MAC + 13)
100 #define SPR_AMACSTATUS  (SPRGROUP_MAC + 14)
101 
102 #define SPR_AMACGUARD0 (SPRGROUP_MAC + 5)
103 #define SPR_AMACGUARD1 (SPRGROUP_MAC + 6)
104 
105 #define SPR_AMACQ0					(SPRGROUP_MAC + 16)
106 #define SPR_AMACQ0_ADDROFS			(SPRGROUP_MAC + 17)
107 #define SPR_AMACQ0_STRIDE			(SPRGROUP_MAC + 18)
108 #define SPR_AMACQ0_STRIDE2			(SPRGROUP_MAC + 19)
109 #define SPR_AMACQ0_ADDROFS_STRIDE	(SPRGROUP_MAC + 20)
110 
111 #define SPR_AMACQ1					(SPRGROUP_MAC + 24)
112 #define SPR_AMACQ1_ADDROFS			(SPRGROUP_MAC + 25)
113 #define SPR_AMACQ1_STRIDE			(SPRGROUP_MAC + 26)
114 #define SPR_AMACQ1_STRIDE2			(SPRGROUP_MAC + 27)
115 #define SPR_AMACQ1_ADDROFS_STRIDE	(SPRGROUP_MAC + 28)
116 
117 #define AMACQSTRIDE_BITS	8
118 #define MK_QADDR_STRIDE(addr_offset, stride, stride2)    \
119     ( ((unsigned)(addr_offset)<<(AMACQSTRIDE_BITS+AMACQSTRIDE_BITS)) |  \
120       (((unsigned)(stride2) & ((1<<AMACQSTRIDE_BITS)-1)) << AMACQSTRIDE_BITS) | \
121       ((unsigned)(stride) & ((1<<AMACQSTRIDE_BITS)-1)) )
122 
123 typedef struct
124 {
125 	union
126 	{
127 		struct
128 		{
129 #if defined(HAS_EXECUTION) && defined(WORDS_BIGENDIAN)
130 			unsigned addr_offset:32-2*AMACQSTRIDE_BITS;
131 			int stride2:AMACQSTRIDE_BITS;	// need to multiply by sizeof(item)
132 			int stride:AMACQSTRIDE_BITS;	// need to multiply by sizeof(item)
133 #else
134 			int stride:AMACQSTRIDE_BITS;
135 			int stride2:AMACQSTRIDE_BITS;
136 			unsigned addr_offset:32-2*AMACQSTRIDE_BITS;
137 #endif
138 		};
139 		unsigned v;
140 	};
141 } qaddr_stride_t;
142 
143 typedef enum
144 {
145     amac_round_to_infinity = 0,
146     amac_round_away_from_zero = 1
147 } amac_round_mode_e;
148 
149 typedef enum
150 {
151     amac_saturate_dont_care = 0,
152     amac_saturate_to_64bit_before_shift = 0,
153     amac_saturate_to_64bit_after_shift = 1,
154     amac_saturate_to_32bit_after_shift = 2,
155     amac_saturate_to_16bit_after_shift = 3
156 } amac_saturate_mode_e;
157 
158 typedef struct
159 {
160 	union
161 	{
162 		struct
163 		{
164 #if defined(HAS_EXECUTION) && defined(WORDS_BIGENDIAN)
165             unsigned __resv4:13;
166             unsigned a1_round_mode:1;
167             unsigned a1_saturate_mode:2;
168             unsigned __resv3:5;
169             unsigned a0_round_mode:1;
170             unsigned a0_saturate_mode:2;
171             unsigned __resv2:5;
172             unsigned fmode:1;
173             unsigned __resv1:1;
174 			unsigned enabled:1;
175 #else
176 			unsigned enabled:1;
177             unsigned __resv1:1;
178             unsigned fmode:1;
179             unsigned __resv2:5;
180             unsigned a0_saturate_mode:2;
181             unsigned a0_round_mode:1;
182             unsigned __resv3:5;
183             unsigned a1_saturate_mode:2;
184             unsigned a1_round_mode:1;
185             unsigned __resv4:13;
186 #endif
187 		};
188 		unsigned v;
189 	};
190 } amac_cfg_t;
191 
192 typedef struct
193 {
194 	union
195 	{
196 		struct
197 		{
198 #if defined(HAS_EXECUTION) && defined(WORDS_BIGENDIAN)
199             unsigned __resv1:31;
200 			unsigned ov:1;
201 #else
202 			unsigned ov:1;
203             unsigned __resv1:31;
204 #endif
205 		};
206 		unsigned v;
207 	};
208 } amac_status_t;
209 
210 /* Debug group */
211 #define SPR_DVR(N)	(SPRGROUP_D + (N))
212 #define SPR_DCR(N)	(SPRGROUP_D + 8 + (N))
213 #define SPR_DMR1	(SPRGROUP_D + 16)
214 #define SPR_DMR2	(SPRGROUP_D + 17)
215 #define SPR_DWCR0	(SPRGROUP_D + 18)
216 #define SPR_DWCR1	(SPRGROUP_D + 19)
217 #define SPR_DSR		(SPRGROUP_D + 20)
218 #define SPR_DRR		(SPRGROUP_D + 21)
219 #define SPR_DREASON	(SPRGROUP_D + 22)
220 
221 /* Performance counters group */
222 #define SPR_PCCR(N)	(SPRGROUP_PC + (N))
223 #define SPR_PCMR(N)	(SPRGROUP_PC + 8 + (N))
224 
225 /* Power management group */
226 #define SPR_PMR (SPRGROUP_PM + 0)
227 
228 #define SPR_PM_PMR  SPR_GRP_OFF(SPR_PMR)
229 
230 /* PIC group */
231 #define SPR_PICMR (SPRGROUP_PIC + 0)
232 #define SPR_PICPR (SPRGROUP_PIC + 1)
233 #define SPR_PICSR (SPRGROUP_PIC + 2)
234 
235 /* Tick Timer group */
236 #define SPR_TTMR (SPRGROUP_TT + 0)
237 #define SPR_TTCR (SPRGROUP_TT + 1)
238 #define SPR_TTUR (SPRGROUP_TT + 31)
239 
240 #define SPR_TT_TTMR	    SPR_GRP_OFF(SPR_TTMR)
241 #define SPR_TT_TTCR		SPR_GRP_OFF(SPR_TTCR)
242 
243 /* Accelerator group */
244 #define SPR_ACCEL_FFT		(SPRGROUP_ACCEL + 0)
245 #define SPR_ACCEL_HW_LOOP_COUNTER0	(SPRGROUP_ACCEL + 8)
246 #define SPR_ACCEL_HW_LOOP_COUNTER1	(SPRGROUP_ACCEL + 9)
247 #define SPR_ACCEL_HW_LOOP_COUNTER2	(SPRGROUP_ACCEL + 10)
248 #define SPR_ACCEL_HW_LOOP_COUNTER3	(SPRGROUP_ACCEL + 11)
249 
250 #define FFT_ADDR_MODE_1_1       0
251 #define FFT_ADDR_MODE_2_2       1
252 
253 typedef struct
254 {
255 	union
256 	{
257 		struct
258 		{
259 			unsigned busy:1;
260 			unsigned __resv1:31;
261 		};
262 		unsigned v;
263 	};
264 } fft_status_t;
265 
266 /*
267  * Bit definitions for the Version Register
268  *
269  */
270 #define SPR_VR_VER	0xffff0000  /* Processor version */
271 #define SPR_VR_CFG	0x00ff0000  /* Processor configuration */
272 #define SPR_VR_REV	0x0000003f  /* Processor revision */
273 
274 /*
275  * Bit definitions for the Unit Present Register
276  *
277  */
278 #define SPR_UPR_UP	0x00000001  /* UPR present */
279 #define SPR_UPR_DCP	0x00000002  /* Data cache present */
280 #define SPR_UPR_ICP	0x00000004  /* Instruction cache present */
281 #define SPR_UPR_DMP	0x00000008  /* Data MMU present */
282 #define SPR_UPR_IMP	0x00000010  /* Instruction MMU present */
283 #define SPR_UPR_MP  0x00000020  /* MAC Present */
284 #define SPR_UPR_DUP	0x00000040  /* Debug unit present */
285 #define SPR_UPR_PCUP	0x00000080  /* Performance counters unit present */
286 #define SPR_UPR_PMP	0x00000100  /* Power management present */
287 #define SPR_UPR_PICP	0x00000200  /* PIC present */
288 #define SPR_UPR_TTP	0x00000400  /* Tick timer present */
289 #define SPR_UPR_CUST	0xff000000  /* Custom units */
290 
291 /*
292  * Bit definitions for the Supervision Register
293  *
294  */
295 #define SPR_SR_CID      0xf0000000  /* Context ID */
296 #define SPR_SR_SUMRA    0x00010000  /* Supervisor SPR read access */
297 #define SPR_SR_FO       0x00008000  /* Fixed one */
298 #define SPR_SR_EPH      0x00004000  /* Exception Prefix High */
299 #define SPR_SR_DSX      0x00002000  /* Delay Slot Exception */
300 #define SPR_SR_OVE      0x00001000  /* Overflow flag Exception */
301 #define SPR_SR_OV       0x00000800  /* Overflow flag */
302 #define SPR_SR_CY       0x00000400  /* Carry flag */
303 #define SPR_SR_F        0x00000200  /* Condition Flag */
304 #define SPR_SR_CE       0x00000100  /* CID Enable */
305 #define SPR_SR_LEE      0x00000080  /* Little Endian Enable */
306 #define SPR_SR_IME      0x00000040  /* Instruction MMU Enable */
307 #define SPR_SR_DME      0x00000020  /* Data MMU Enable */
308 #define SPR_SR_ICE      0x00000010  /* Instruction Cache Enable */
309 #define SPR_SR_DCE      0x00000008  /* Data Cache Enable */
310 #define SPR_SR_IEE      0x00000004  /* Interrupt Exception Enable */
311 #define SPR_SR_TEE      0x00000002  /* Tick timer Exception Enable */
312 #define SPR_SR_SM       0x00000001  /* Supervisor Mode */
313 
314 /*
315  * Bit definitions for the Data MMU Control Register
316  *
317  */
318 #define SPR_DMMUCR_P2S	0x0000003e  /* Level 2 Page Size */
319 #define SPR_DMMUCR_P1S	0x000007c0  /* Level 1 Page Size */
320 #define SPR_DMMUCR_VADDR_WIDTH	0x0000f800  /* Virtual ADDR Width */
321 #define SPR_DMMUCR_PADDR_WIDTH	0x000f0000  /* Physical ADDR Width */
322 
323 /*
324  * Bit definitions for the Instruction MMU Control Register
325  *
326  */
327 #define SPR_IMMUCR_P2S	0x0000003e  /* Level 2 Page Size */
328 #define SPR_IMMUCR_P1S	0x000007c0  /* Level 1 Page Size */
329 #define SPR_IMMUCR_VADDR_WIDTH	0x0000f800  /* Virtual ADDR Width */
330 #define SPR_IMMUCR_PADDR_WIDTH	0x000f0000  /* Physical ADDR Width */
331 
332 /*
333  * Bit definitions for the Data TLB Match Register
334  *
335  */
336 #define SPR_DTLBMR_V	0x00000001  /* Valid */
337 #define SPR_DTLBMR_PL1	0x00000002  /* Page Level 1 (if 0 then PL2) */
338 #define SPR_DTLBMR_CID	0x0000003c  /* Context ID */
339 #define SPR_DTLBMR_LRU	0x000000c0  /* Least Recently Used */
340 #define SPR_DTLBMR_VPN	0xfffff000  /* Virtual Page Number */
341 
342 /*
343  * Bit definitions for the Data TLB Translate Register
344  *
345  */
346 #define SPR_DTLBTR_CC	0x00000001  /* Cache Coherency */
347 #define SPR_DTLBTR_CI	0x00000002  /* Cache Inhibit */
348 #define SPR_DTLBTR_WBC	0x00000004  /* Write-Back Cache */
349 #define SPR_DTLBTR_WOM	0x00000008  /* Weakly-Ordered Memory */
350 #define SPR_DTLBTR_A	0x00000010  /* Accessed */
351 #define SPR_DTLBTR_D	0x00000020  /* Dirty */
352 #define SPR_DTLBTR_URE	0x00000040  /* User Read Enable */
353 #define SPR_DTLBTR_UWE	0x00000080  /* User Write Enable */
354 #define SPR_DTLBTR_SRE	0x00000100  /* Supervisor Read Enable */
355 #define SPR_DTLBTR_SWE	0x00000200  /* Supervisor Write Enable */
356 #define SPR_DTLBTR_PPN	0xfffff000  /* Physical Page Number */
357 
358 /*
359  * Bit definitions for the Instruction TLB Match Register
360  *
361  */
362 #define SPR_ITLBMR_V	0x00000001  /* Valid */
363 #define SPR_ITLBMR_PL1	0x00000002  /* Page Level 1 (if 0 then PL2) */
364 #define SPR_ITLBMR_CID	0x0000003c  /* Context ID */
365 #define SPR_ITLBMR_LRU	0x000000c0  /* Least Recently Used */
366 #define SPR_ITLBMR_VPN	0xfffff000  /* Virtual Page Number */
367 
368 /*
369  * Bit definitions for the Instruction TLB Translate Register
370  *
371  */
372 #define SPR_ITLBTR_CC	0x00000001  /* Cache Coherency */
373 #define SPR_ITLBTR_CI	0x00000002  /* Cache Inhibit */
374 #define SPR_ITLBTR_WBC	0x00000004  /* Write-Back Cache */
375 #define SPR_ITLBTR_WOM	0x00000008  /* Weakly-Ordered Memory */
376 #define SPR_ITLBTR_A	0x00000010  /* Accessed */
377 #define SPR_ITLBTR_D	0x00000020  /* Dirty */
378 #define SPR_ITLBTR_SXE	0x00000040  /* User Read Enable */
379 #define SPR_ITLBTR_UXE	0x00000080  /* User Write Enable */
380 #define SPR_ITLBTR_PPN	0xfffff000  /* Physical Page Number */
381 
382 /*
383  * Bit definitions for Data Cache Control register
384  *
385  */
386 #define SPR_DCCR_EW	0x000000ff  /* Enable ways */
387 
388 /*
389  * Bit definitions for Insn Cache Control register
390  *
391  */
392 #define SPR_ICCR_EW	0x000000ff  /* Enable ways */
393 
394 /*
395  * Bit definitions for Debug Control register
396  *
397  */
398 #if 1
399 #define SPR_DCFGR_NDP	0x00000007
400 #define SPR_DCFGR_WPCI	0x00000008
401 #else
402 #define SPR_DCFGR_NDP   0x0000000f
403 #define SPR_DCFGR_WPCI  0x00000010
404 #endif
405 
406 
407 /*
408  * Bit definitions for Data Cache Configuration Register
409  *
410  */
411 
412 #define SPR_DCCFGR_NCW		0x00000007
413 #define SPR_DCCFGR_NCS		0x00000078
414 #define SPR_DCCFGR_CBS		0x00000080
415 #define SPR_DCCFGR_CWS		0x00000100
416 #define SPR_DCCFGR_CCRI		0x00000200
417 #define SPR_DCCFGR_CBIRI	0x00000400
418 #define SPR_DCCFGR_CBPRI	0x00000800
419 #define SPR_DCCFGR_CBLRI	0x00001000
420 #define SPR_DCCFGR_CBFRI	0x00002000
421 #define SPR_DCCFGR_CBWBRI	0x00004000
422 
423 /*
424  * Bit definitions for Instruction Cache Configuration Register
425  *
426  */
427 #define SPR_ICCFGR_NCW		0x00000007
428 #define SPR_ICCFGR_NCS		0x00000078
429 #define SPR_ICCFGR_CBS		0x00000080
430 #define SPR_ICCFGR_CCRI		0x00000200
431 #define SPR_ICCFGR_CBIRI	0x00000400
432 #define SPR_ICCFGR_CBPRI	0x00000800
433 #define SPR_ICCFGR_CBLRI	0x00001000
434 
435 /*
436  * Bit definitions for Data MMU Configuration Register
437  *
438  */
439 
440 #define SPR_DMMUCFGR_NTW	0x00000003
441 #define SPR_DMMUCFGR_NTS	0x0000001C
442 #define SPR_DMMUCFGR_NAE	0x000000E0
443 #define SPR_DMMUCFGR_CRI	0x00000100
444 #define SPR_DMMUCFGR_PRI        0x00000200
445 #define SPR_DMMUCFGR_TEIRI	0x00000400
446 #define SPR_DMMUCFGR_HTR	0x00000800
447 
448 /*
449  * Bit definitions for Instruction MMU Configuration Register
450  *
451  */
452 
453 #define SPR_IMMUCFGR_NTW	0x00000003
454 #define SPR_IMMUCFGR_NTS	0x0000001C
455 #define SPR_IMMUCFGR_NAE	0x000000E0
456 #define SPR_IMMUCFGR_CRI	0x00000100
457 #define SPR_IMMUCFGR_PRI	0x00000200
458 #define SPR_IMMUCFGR_TEIRI	0x00000400
459 #define SPR_IMMUCFGR_HTR	0x00000800
460 
461 /*
462  * Bit definitions for Debug Control registers
463  *
464  */
465 #define SPR_DCR_DP	0x00000001  /* DVR/DCR present */
466 #define SPR_DCR_CC	0x0000000e  /* Compare condition */
467 #define SPR_DCR_SC	0x00000010  /* Signed compare */
468 #define SPR_DCR_CT	0x000000e0  /* Compare to */
469 
470 /* Bit results with SPR_DCR_CC mask */
471 #define SPR_DCR_CC_MASKED 0x00000000
472 #define SPR_DCR_CC_EQUAL  0x00000002
473 #define SPR_DCR_CC_LESS   0x00000004
474 #define SPR_DCR_CC_LESSE  0x00000006
475 #define SPR_DCR_CC_GREAT  0x00000008
476 #define SPR_DCR_CC_GREATE 0x0000000a
477 #define SPR_DCR_CC_NEQUAL 0x0000000c
478 
479 /* Bit results with SPR_DCR_CT mask */
480 #define SPR_DCR_CT_DISABLED 0x00000000
481 #define SPR_DCR_CT_IFEA     0x00000020
482 #define SPR_DCR_CT_LEA      0x00000040
483 #define SPR_DCR_CT_SEA      0x00000060
484 #define SPR_DCR_CT_LD       0x00000080
485 #define SPR_DCR_CT_SD       0x000000a0
486 #define SPR_DCR_CT_LSEA     0x000000c0
487 #define SPR_DCR_CT_LSD		0x000000e0
488 /* SPR_DCR_CT_LSD doesn't seem to be implemented anywhere in or1ksim. 2004-1-30 HP */
489 
490 /*
491  * Bit definitions for Debug Mode 1 register
492  *
493  */
494 #define SPR_DMR1_CW0	0x00000003  /* Chain watchpoint 0 */
495 #define SPR_DMR1_CW1	0x0000000c  /* Chain watchpoint 1 */
496 #define SPR_DMR1_CW2	0x00000030  /* Chain watchpoint 2 */
497 #define SPR_DMR1_CW3	0x000000c0  /* Chain watchpoint 3 */
498 #define SPR_DMR1_CW4	0x00000300  /* Chain watchpoint 4 */
499 #define SPR_DMR1_CW5	0x00000c00  /* Chain watchpoint 5 */
500 #define SPR_DMR1_CW6	0x00003000  /* Chain watchpoint 6 */
501 #define SPR_DMR1_CW7	0x0000c000  /* Chain watchpoint 7 */
502 #define SPR_DMR1_CW8	0x00030000  /* Chain watchpoint 8 */
503 #define SPR_DMR1_CW9	0x000c0000  /* Chain watchpoint 9 */
504 #define SPR_DMR1_CW10	0x00300000  /* Chain watchpoint 10 */
505 #define SPR_DMR1_ST	0x00400000  /* Single-step trace*/
506 #define SPR_DMR1_BT	0x00800000  /* Branch trace */
507 #define SPR_DMR1_DXFW	0x01000000  /* Disable external force watchpoint */
508 
509 /*
510  * Bit definitions for Debug Mode 2 register
511  *
512  */
513 #define SPR_DMR2_WCE0	0x00000001  /* Watchpoint counter 0 enable */
514 #define SPR_DMR2_WCE1	0x00000002  /* Watchpoint counter 1 enable */
515 #define SPR_DMR2_AWTC	0x00000ffc  /* Assign watchpoints to counters */
516 #define SPR_DMR2_WGB	0x003ff000  /* Watchpoints generating breakpoint */
517 #define SPR_DMR2_WBS	0xffc00000  /* Watchpoints generated breakpoint */
518 
519 /*
520  * Bit definitions for Debug watchpoint counter registers
521  *
522  */
523 #define SPR_DWCR_COUNT	0x0000ffff  /* Count */
524 #define SPR_DWCR_MATCH	0xffff0000  /* Match */
525 
526 /*
527  * Bit definitions for Debug stop register
528  *
529  */
530 #define SPR_DSR_RSTE	0x00000001  /* Reset exception */
531 #define SPR_DSR_BUSEE	0x00000002  /* Bus error exception */
532 #define SPR_DSR_DPFE	0x00000004  /* Data Page Fault exception */
533 #define SPR_DSR_IPFE	0x00000008  /* Insn Page Fault exception */
534 #define SPR_DSR_TTE	0x00000010  /* Tick Timer exception */
535 #define SPR_DSR_AE	0x00000020  /* Alignment exception */
536 #define SPR_DSR_IIE	0x00000040  /* Illegal Instruction exception */
537 #define SPR_DSR_IE	0x00000080  /* Interrupt exception */
538 #define SPR_DSR_DME	0x00000100  /* DTLB miss exception */
539 #define SPR_DSR_IME	0x00000200  /* ITLB miss exception */
540 #define SPR_DSR_RE	0x00000400  /* Range exception */
541 #define SPR_DSR_SCE	0x00000800  /* System call exception */
542 #define SPR_DSR_FPE     0x00001000  /* Floating Point Exception */
543 #define SPR_DSR_TE	0x00002000  /* Trap exception */
544 
545 /*
546  * Bit definitions for Debug reason register
547  *
548  */
549 #define SPR_DRR_RSTE	0x00000001  /* Reset exception */
550 #define SPR_DRR_BUSEE	0x00000002  /* Bus error exception */
551 #define SPR_DRR_DPFE	0x00000004  /* Data Page Fault exception */
552 #define SPR_DRR_IPFE	0x00000008  /* Insn Page Fault exception */
553 #define SPR_DRR_TTE	0x00000010  /* Tick Timer exception */
554 #define SPR_DRR_AE	0x00000020  /* Alignment exception */
555 #define SPR_DRR_IIE	0x00000040  /* Illegal Instruction exception */
556 #define SPR_DRR_IE	0x00000080  /* Interrupt exception */
557 #define SPR_DRR_DME	0x00000100  /* DTLB miss exception */
558 #define SPR_DRR_IME	0x00000200  /* ITLB miss exception */
559 #define SPR_DRR_RE	0x00000400  /* Range exception */
560 #define SPR_DRR_SCE	0x00000800  /* System call exception */
561 #define SPR_DRR_FPE     0x00001000  /* Floating Point Exception */
562 #define SPR_DRR_TE	0x00002000  /* Trap exception */
563 
564 /*
565  * Bit definitions for Performance counters mode registers
566  *
567  */
568 #define SPR_PCMR_CP	0x00000001  /* Counter present */
569 #define SPR_PCMR_UMRA	0x00000002  /* User mode read access */
570 #define SPR_PCMR_CISM	0x00000004  /* Count in supervisor mode */
571 #define SPR_PCMR_CIUM	0x00000008  /* Count in user mode */
572 #define SPR_PCMR_LA	0x00000010  /* Load access event */
573 #define SPR_PCMR_SA	0x00000020  /* Store access event */
574 #define SPR_PCMR_IF	0x00000040  /* Instruction fetch event*/
575 #define SPR_PCMR_DCM	0x00000080  /* Data cache miss event */
576 #define SPR_PCMR_ICM	0x00000100  /* Insn cache miss event */
577 #define SPR_PCMR_IFS	0x00000200  /* Insn fetch stall event */
578 #define SPR_PCMR_LSUS	0x00000400  /* LSU stall event */
579 #define SPR_PCMR_BS	0x00000800  /* Branch stall event */
580 #define SPR_PCMR_DTLBM	0x00001000  /* DTLB miss event */
581 #define SPR_PCMR_ITLBM	0x00002000  /* ITLB miss event */
582 #define SPR_PCMR_DDS	0x00004000  /* Data dependency stall event */
583 #define SPR_PCMR_WPE	0x03ff8000  /* Watchpoint events */
584 
585 /*
586  * Bit definitions for the Power management register
587  *
588  */
589 #define SPR_PMR_SDF	0x0000000f  /* Slow down factor */
590 #define SPR_PMR_DME	0x00000010  /* Doze mode enable */
591 #define SPR_PMR_SME	0x00000020  /* Sleep mode enable */
592 #define SPR_PMR_DCGE	0x00000040  /* Dynamic clock gating enable */
593 #define SPR_PMR_SUME	0x00000080  /* Suspend mode enable */
594 
595 /*
596  * Bit definitions for PICMR
597  *
598  */
599 #define SPR_PICMR_IUM	0xfffffffc  /* Interrupt unmask */
600 
601 /*
602  * Bit definitions for PICPR
603  *
604  */
605 #define SPR_PICPR_IPRIO	0xfffffffc  /* Interrupt priority */
606 
607 /*
608  * Bit definitions for PICSR
609  *
610  */
611 #define SPR_PICSR_IS	0xffffffff  /* Interrupt status */
612 
613 /*
614  * Bit definitions for Tick Timer Control Register
615  *
616  */
617 #define SPR_TTCR_PERIOD	0x0fffffff  /* Time Period */
618 #define SPR_TTMR_PERIOD	SPR_TTCR_PERIOD
619 #define SPR_TTMR_IP	0x10000000  /* Interrupt Pending */
620 #define SPR_TTMR_IE	0x20000000  /* Interrupt Enable */
621 #define SPR_TTMR_RT	0x40000000  /* Restart tick */
622 #define SPR_TTMR_SR     0x80000000  /* Single run */
623 #define SPR_TTMR_CR     0xc0000000  /* Continuous run */
624 #define SPR_TTMR_M      0xc0000000  /* Tick mode */
625 
626 #define SPR_TTUR_ICTM   0x00000001  /* Instruction Counting Timer Mode */
627 
628 /*
629  * l.nop constants
630  *
631  */
632 #define NOP_NOP         0x0000      /* Normal nop instruction */
633 #define NOP_EXIT        0x0001      /* End of simulation */
634 #define NOP_REPORT      0x0002      /* Simple report */
635 #define NOP_PRINTF      0x0003      /* Simprintf instruction */
636 #define NOP_CNT_RESET   0x0005	    /* Reset statistics counters */
637 #define NOP_SIM_SYS_CALL 0x0006     /* Simulator syscall emulation */
638 #define NOP_REPORT_FIRST 0x0400     /* Report with number */
639 #define NOP_REPORT_LAST 0x03ff      /* Report with number */
640 
641 
642 #ifndef HAZARD
643 #define HAZARD(x)       x
644 #endif
645 
646 #define L_FF1(x)                                            \
647 ({  int _res_, _x_ = x; __asm__ volatile ("l.ff1\t%0,%1" : "=r"(_res_) : "r"(_x_)); _res_; })
648 
649 #define L_FL1(x)                                            \
650 ({  int _res_, _x_ = x; __asm__ volatile ("l.fl1\t%0,%1" : "=r"(_res_) : "r"(_x_)); _res_; })
651 
652 /* Move from architecture special register (SPR) */
653 #define __l_mfspr__(result, spr_grp, spr_reg)               \
654     __asm__ volatile ("l.mfspr\t%0,%1,%2" : "=r"(result) : "r"(spr_reg), "K"(spr_grp));
655 #define __l_mfspr1__(result, spr)                           \
656     __asm__ volatile ("l.mfspr\t%0,r0,%1" : "=r"(result) : "K"(spr))
657 #define __l_mtspr__(spr_grp, spr_reg, val)                  \
658     __asm__ volatile ("l.mtspr\t%0,%1,%2" : : "r"(spr_reg), "r"(val), "K"(spr_grp));
659 #define __l_mtspr1__(spr,val)                               \
660     __asm__ volatile ("l.mtspr\tr0,%0,%1" : : "r"(val), "K"(spr))
661 
662 #define L_MFSPR(_spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_) \
663 ({  int _res_; __l_mfspr__(_res_, _spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_); _res_; })
664 
665 #define L_MFSPR1(_spr_register_16bit_const_)                \
666 ({  int _res_; __l_mfspr1__(_res_, _spr_register_16bit_const_); _res_; })
667 
668 /* Move data to architecture special registers (SPR) */
669 #define L_MTSPR(_spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_, _val_) \
670     __l_mtspr__(_spr_group_5bit_shifted_by_11bit_, _spr_register_11bit_const_, _val_)
671 
672 #define L_MTSPR1(_spr_register_16bit_const_, _val_)         \
673     __l_mtspr1__(_spr_register_16bit_const_, _val_)
674 
675 /* Enhancement to existing multiply (keep full 64-bit) */
676 #define __l_mul__(lo, a,b)                  \
677    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.mul\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
678 #define __l_muli__(lo, a,b)                 \
679    { int _a_ = (a); __asm__ volatile ("l.muli\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "I"(b)); }
680 #define __l_mulu__(lo, a,b)                 \
681    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.mulu\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
682 
683 /* signed 32x32 to signed 64-bit multiply */
684 #define L_MULL(a,b)                         \
685 ({  int _lo_; __l_mul__(_lo_, a,b);         \
686     int _hi_; __l_mfspr1__(_hi_, SPR_MACHI2); \
687     ((long long)_hi_<<32)|(unsigned)_lo_; })
688 
689 #define L_MULL_LO(a,b)                      \
690 ({  int _lo_; __l_mul__(_lo_, a,b); _lo_; })
691 
692 #define L_MULLI(a,b)                        \
693 ({  int _lo_; __l_muli__(_lo_, a,b);        \
694     int _hi_; __l_mfspr1__(_hi_, SPR_MACHI2); \
695     ((long long)_hi_<<32)|(unsigned)_lo_; })
696 
697 #define L_MULLI_LO(a,b)                     \
698 ({  int _lo_; __l_muli__(_lo_, a,b); _lo_; })
699 
700 #define L_MULLU(a,b)                        \
701 ({  unsigned _lo_; __l_mulu__(_lo_, a,b);   \
702     unsigned _hi_; __l_mfspr1__(_hi_, SPR_MACHI2); \
703     ((unsigned long long)_hi_<<32)|(unsigned)_lo_; })
704 
705 #define L_MULLU_LO(a,b)                     \
706 ({  int _lo_; __l_mulu__(_lo_, a,b); _lo_; })
707 
708 
709 #define __l_amull__(lo, a,b)                \
710    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amull\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
711 #define __l_amullu__(lo, a,b)                \
712    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amullu\t%0,%1,%2" : "=r"(lo) : "r"(_a_), "r"(_b_)); }
713 
714 /* Pipelined version of signed 32x32 multiply to 64-bit */
715 #define L_AMULL(a,b)                        \
716 ({  int _lo_; __l_amull__(_lo_, a,b);       \
717     int _hi_; __l_mfspr1__(_hi_, SPR_AMULHI); \
718     ((long long)_hi_<<32)|(unsigned)_lo_; })
719 
720 #define L_AMULL_LO(a,b)                     \
721 ({  int _lo_; __l_amull__(_lo_, a,b); _lo_; })
722 
723 /* Pipelined version of unsigned 32x32 multiply to 64-bit  */
724 #define L_AMULLU(a,b)                       \
725 ({  unsigned _lo_; __l_amullu__(_lo_, a,b); \
726     unsigned _hi_; __l_mfspr1__(_hi_, SPR_AMULHI); \
727     ((unsigned long long)_hi_<<32)|(unsigned)_lo_; })
728 
729 #define L_AMULLU_LO(a,b)                    \
730 ({  unsigned _lo_; __l_amullu__(_lo_, a,b); _lo_; })
731 
732 
733 #define AMAC0       0
734 #define AMAC1       1
735 #define AMAC_Q0     0
736 #define AMAC_Q1     1
737 
738 #define __l_amul__(m, a,b)                      \
739    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amul\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
740 #define __l_amac__(m, a,b)                      \
741    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amac\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
742 #define __l_amsb__(m, a,b)                      \
743    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amsb\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
744 #define __l_amulq__(m, a,q)                     \
745    { int _a_ = (a); __asm__ volatile ("l.amulq\ta" #m ",%0,q" #q : : "r"(_a_)); }
746 #define __l_amacq__(m, a,q)                     \
747    { int _a_ = (a); __asm__ volatile ("l.amacq\ta" #m ",%0,q" #q : : "r"(_a_)); }
748 #define __l_amsbq__(m, a,q)                     \
749    { int _a_ = (a); __asm__ volatile ("l.amsbq\ta" #m ",%0,q" #q : : "r"(_a_)); }
750 
751 #define __l_amulu__(m, a,b)                      \
752    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.amulu\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
753 #define __l_amacu__(m, a,b)                      \
754    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.amacu\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
755 #define __l_amsbu__(m, a,b)                      \
756    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.amsbu\ta" #m ",%0,%1" : : "r"(_a_), "r"(_b_)); }
757 #define __l_amuluq__(m, a,q)                     \
758    { unsigned _a_ = (a); __asm__ volatile ("l.amulqu\ta" #m ",%0,q" #q : : "r"(_a_)); }
759 #define __l_amacuq__(m, a,q)                     \
760    { unsigned _a_ = (a); __asm__ volatile ("l.amacqu\ta" #m ",%0,q" #q : : "r"(_a_)); }
761 #define __l_amsbuq__(m, a,q)                     \
762    { unsigned _a_ = (a); __asm__ volatile ("l.amsbqu\ta" #m ",%0,q" #q : : "r"(_a_)); }
763 
764 #define __l_amul_rs__(lo, m,a,b, bits)          \
765    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amul_rs\t%0,a" #m ",%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
766 #define __l_amac_rs__(lo, m,a,b, bits)          \
767    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amac_rs\t%0,a" #m ",%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
768 #define __l_amsb_rs__(lo, m,a,b, bits)          \
769    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.amsb_rs\t%0,a" #m ",%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
770 #define __l_amulq_rs__(lo, m,a,q, bits)         \
771    { int _a_ = (a); __asm__ volatile ("l.amulq_rs\t%0,a" #m ",%1,q" #q ",%2": "=r"(lo) : "r"(_a_), "K"(bits)); }
772 #define __l_amacq_rs__(lo, m,a,q, bits)         \
773    { int _a_ = (a); __asm__ volatile ("l.amacq_rs\t%0,a" #m ",%1,q" #q ",%2": "=r"(lo) : "r"(_a_), "K"(bits)); }
774 #define __l_amsbq_rs__(lo, m,a,q, bits)         \
775    { int _a_ = (a); __asm__ volatile ("l.amsbq_rs\t%0,a" #m ",%1,q" #q ",%2": "=r"(lo) : "r"(_a_), "K"(bits)); }
776 
777 #define __l_amacr_lo__(m, lo, mode, bits)       \
778     __asm__ volatile ("l.amacr\t%0,a" #m ",%1,%2\t# CLR|RND|SHT|SAT" : "=r"(lo) : "K"(mode), "K"(bits))
779 #define __l_mfspr_amachi__(m, hi)               \
780     __asm__ volatile ("l.mfspr\t%0,r0,%1\t# amac\ta" #m "hi" : "=r"(hi) : "K"(SPR_AMACHI0+2*m))
781 #define __l_mfspr_amacguard__(m, g)            \
782     __asm__ volatile ("l.mfspr\t%0,r0,%1\t# amac\ta" #m "g" : "=r"(g) : "K"(SPR_AMACGUARD0+m))
783 
784 /* Pipelined version of signed 32x32 mutiply to a 72-bit MAC */
785 #define L_AMUL(m, a,b)  __l_amul__(m, a,b)
786 #define L_AMULU(m, a,b) __l_amulu__(m, a,b)
787 #define L_AMUL_RS_LO(m, a,b, bits)  ({ int _lo_; __l_amul_rs__(_lo_, m,a,b, bits); _lo_; })
788 
789 /* Pipelined version of signed 32x32 mutiply accumulated to a 72-bit MAC */
790 #define L_AMAC(m, a,b)  __l_amac__(m, a,b)
791 #define L_AMACU(m, a,b) __l_amacu__(m, a,b)
792 #define L_AMAC_RS_LO(m, a,b, bits)  ({ int _lo_; __l_amac_rs__(_lo_, m,a,b, bits); _lo_; })
793 
794 /* Pipelined version of signed 32x32 mutiply substracted from a 72-bit MAC */
795 #define L_AMSB(m, a,b)  __l_amsb__(m, a,b)
796 #define L_AMSBU(m, a,b) __l_amsbu__(m, a,b)
797 #define L_AMSB_RS_LO(m, a,b, bits)  ({ int _lo_; __l_amsb_rs__(_lo_, m,a,b, bits); _lo_; })
798 
799 /* Pipelined version of signed 32x32 register * amacq mutiply to a 72-bit MAC */
800 #define L_AMULQ(m, a,q)   __l_amulq__(m, a,q)
801 #define L_AMULUQ(m, a,q)  __l_amuluq__(m, a,q)
802 #define L_AMULQ_RS_LO(m, a,q, bits)  ({ int _lo_; __l_amulq_rs__(_lo_, m,a,q, bits); _lo_; })
803 
804 /* Pipelined version of signed 32x32 register * amacq mutiply accumulated to a 72-bit MAC */
805 #define L_AMACQ(m, a,q)   __l_amacq__(m, a,q)
806 #define L_AMACUQ(m, a,q)  __l_amacuq__(m, a,q)
807 #define L_AMACQ_RS_LO(m, a,q, bits)  ({ int _lo_; __l_amacq_rs__(_lo_, m,a,q, bits); _lo_; })
808 
809 /* Pipelined version of signed 32x32 register * amacq mutiply substracted from a 72-bit MAC */
810 #define L_AMSBQ(m, a,q)   __l_amsbq__(m, a,q)
811 #define L_AMSBUQ(m, a,q)  __l_amsbuq__(m, a,q)
812 #define L_AMSBQ_RS_LO(m, a,q, bits)  ({ int _lo_; __l_amsbq_rs__(_lo_, m,a,q, bits); _lo_; })
813 
814 /* Pipelined version of setting a 72-bit MAC */
815 #define L_AMACW(m, hi, lo)                  \
816    { int _hi_ = (hi), _lo_ = (lo); __asm__ volatile ("l.amacw\ta" #m ",%0,%1" : : "r"(_hi_), "r"(_lo_)); }
817 #define L_AMACW0(m)                         \
818    { __asm__ volatile ("l.amacw\ta" #m ",r0,r0"); }
819 
820 /* Pipelined version of reading a 72-bit MAC, optional round, shift, saturate, and clear (in that order)*/
821 #define AMACR_CLR   8   // clear
822 #define AMACR_RND   4   // round
823 #define AMACR_SAT   2   // saturate
824 #define AMACR_SHR   1   // shift
825 
826 #if 0 // doesn't work because SPR_AMACHI/LO isn't updated when !AMACR_CLR, or cleared when AMACR_CLR.  In both case, _hi_ isn't what we want
827 #define L_AMACR(m, mode, bits)              \
828 ({  int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
829     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
830     ((long long)_hi_<<32)|(unsigned)_lo_; })
831 #endif // 0
832 
833 #define L_AMACR_LO(m, mode, bits)           \
834 ({  int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
835 
836 #define L_MFSPR_AMACHI(m)                   \
837 ({  int _hi_; __l_mfspr_amachi__(m, _hi_); _hi; })
838 
839 #define L_MFSPR_GUARD(m)                    \
840 ({  int _g_; __l_mfspr_amacguard__(m, _g_); _g; })
841 
842 
843 /* Double-load */
844 #define __l_ldww__(ldww, q, mask, r, addr, offset)    \
845     __asm__ volatile (ldww "\t%0,%1(%2),q" #q ",%3" : "=r"(r) : "I"(offset), "r"(addr), "K"(mask));
846 
847 #define L_LDWW(q, mask, addr, offset)       \
848 ({  int _r_; __l_ldww__("l.ldww", q, mask, _r_, addr, offset); _r_; })
849 
850 #define L_LDWWX(q, mask, addr, offset)      \
851 ({  int _r_; __l_ldww__("l.ldwwx", q, mask, _r_, addr, offset); _r_; })
852 
853 #define L_LDWW2(q, mask, addr, offset)      \
854 ({  int _r_; __l_ldww__("l.ldww2", q, mask, _r_, addr, offset); _r_; })
855 
856 #define L_LDWW2X(q, mask, addr, offset)     \
857 ({  int _r_; __l_ldww__("l.ldww2x", q, mask, _r_, addr, offset); _r_; })
858 
859 
860 /* AMAC combo */
861 #if 0 // doesn't work because SPR_AMACHI/LO isn't updated when !AMACR_CLR, or cleared when AMACR_CLR.  In both case, _hi_ isn't what we want
862 #define L_AMAC_p(m, a1,b1, mode, bits)      \
863 ({  __l_amul__(m, a1,b1);                   \
864     HAZARD(__asm__ volatile ("l.nop"));     \
865     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
866     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
867     ((long long)_hi_<<32)|(unsigned)_lo_; })
868 
869 #define L_AMACQ_p(m, a1, q, mode, bits)     \
870 ({  __l_amulq__(m, a1,q);                   \
871     HAZARD(__asm__ volatile ("l.nop"));     \
872     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
873     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
874     ((long long)_hi_<<32)|(unsigned)_lo_; })
875 
876 #define L_AMAC_pp(m, a1,b1, a2,b2, mode, bits) \
877 ({  __l_amul__(m, a1,b1);                   \
878     __l_amac__(m, a2,b2);                   \
879     HAZARD(__asm__ volatile ("l.nop"));     \
880     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); \
881     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
882     ((long long)_hi_<<32)|(unsigned)_lo_; })
883 #endif // 0
884 
885 #define L_AMAC_p_LO(m, a1,b1, mode, bits)   \
886 ({  __l_amul__(m, a1,b1);                   \
887     HAZARD(__asm__ volatile ("l.nop"));     \
888     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
889 
890 #define L_AMACQ_p_LO(m, a1, q, mode, bits)  \
891 ({  __l_amulq__(m, a1,q);                   \
892     HAZARD(__asm__ volatile ("l.nop"));     \
893     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
894 
895 #define L_AMAC_pp_LO(m, a1,b1, a2,b2, mode, bits) \
896 ({  __l_amul__(m, a1,b1);                   \
897     __l_amac__(m, a2,b2);                   \
898     HAZARD(__asm__ volatile ("l.nop"));     \
899     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
900 
901 #define L_AMACU_pp_LO(m, a1,b1, a2,b2, mode, bits) \
902 ({  __l_amulu__(m, a1,b1);                  \
903     __l_amacu__(m, a2,b2);                  \
904     HAZARD(__asm__ volatile ("l.nop"));     \
905     unsigned _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
906 
907 #define L_AMAC_pq_LO(m, a1,b1, a2,q2, mode, bits) \
908 ({  __l_amul__(m, a1,b1);                   \
909     __l_amacq__(m, a2,q2);                  \
910     HAZARD(__asm__ volatile ("l.nop"));     \
911     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
912 
913 #define L_AMAC_pm_LO(m, a1,b1, a2,b2, mode, bits) \
914 ({  __l_amul__(m, a1,b1);                   \
915     __l_amsb__(m, a2,b2);                   \
916     HAZARD(__asm__ volatile ("l.nop"));     \
917     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
918 
919 #define L_AMAC_ppp_LO(m, a1,b1, a2,b2, a3,b3, mode, bits) \
920 ({  __l_amul__(m, a1,b1);                   \
921     __l_amac__(m, a2,b2);                   \
922     __l_amac__(m, a3,b3);                   \
923     HAZARD(__asm__ volatile ("l.nop"));     \
924     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
925 
926 #define L_AMAC_ppm_LO(m, a1,b1, a2,b2, a3,b3, mode, bits) \
927 ({  __l_amul__(m, a1,b1);                   \
928     __l_amac__(m, a2,b2);                   \
929     __l_amsb__(m, a3,b3);                   \
930     HAZARD(__asm__ volatile ("l.nop"));     \
931     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
932 
933 #define L_AMAC_pmm_LO(m, a1,b1, a2,b2, a3,b3, mode, bits) \
934 ({  __l_amul__(m, a1,b1);                   \
935     __l_amsb__(m, a2,b2);                   \
936     __l_amsb__(m, a3,b3);                   \
937     HAZARD(__asm__ volatile ("l.nop"));     \
938     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
939 
940 #define L_AMAC_pppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
941 ({  __l_amul__(m, a1,b1);                   \
942     __l_amac__(m, a2,b2);                   \
943     __l_amac__(m, a3,b3);                   \
944     __l_amac__(m, a4,b4);                   \
945     HAZARD(__asm__ volatile ("l.nop"));     \
946     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
947 
948 #define L_AMAC_ppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
949 ({  __l_amul__(m, a1,b1);                   \
950     __l_amac__(m, a2,b2);                   \
951     __l_amsb__(m, a3,b3);                   \
952     __l_amsb__(m, a4,b4);                   \
953     HAZARD(__asm__ volatile ("l.nop"));     \
954     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
955 
956 #define L_AMAC_pmpm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
957 ({  __l_amul__(m, a1,b1);                   \
958     __l_amsb__(m, a2,b2);                   \
959     __l_amac__(m, a3,b3);                   \
960     __l_amsb__(m, a4,b4);                   \
961     HAZARD(__asm__ volatile ("l.nop"));     \
962     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
963 
964 #define L_AMAC_pmmp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, mode, bits) \
965 ({  __l_amul__(m, a1,b1);                   \
966     __l_amsb__(m, a2,b2);                   \
967     __l_amsb__(m, a3,b3);                   \
968     __l_amac__(m, a4,b4);                   \
969     HAZARD(__asm__ volatile ("l.nop"));     \
970     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
971 
972 #define L_AMAC_ppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, mode, bits) \
973 ({  __l_amul__(m, a1,b1);                   \
974     __l_amac__(m, a2,b2);                   \
975     __l_amac__(m, a3,b3);                   \
976     __l_amac__(m, a4,b4);                   \
977     __l_amac__(m, a5,b5);                   \
978     HAZARD(__asm__ volatile ("l.nop"));     \
979     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
980 
981 #define L_AMAC_pppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, mode, bits) \
982 ({  __l_amul__(m, a1,b1);                   \
983     __l_amac__(m, a2,b2);                   \
984     __l_amac__(m, a3,b3);                   \
985     __l_amsb__(m, a4,b4);                   \
986     __l_amsb__(m, a5,b5);                   \
987     HAZARD(__asm__ volatile ("l.nop"));     \
988     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
989 
990 #define L_AMAC_pppppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, a9,b9, a10,b10, mode, bits) \
991 ({  __l_amul__(m, a1,b1);                   \
992     __l_amac__(m, a2,b2);                   \
993     __l_amac__(m, a3,b3);                   \
994     __l_amac__(m, a4,b4);                   \
995     __l_amac__(m, a5,b5);                   \
996     __l_amac__(m, a6,b6);                   \
997     __l_amac__(m, a7,b7);                   \
998     __l_amac__(m, a8,b8);                   \
999     __l_amac__(m, a9,b9);                   \
1000     __l_amac__(m, a10,b10);                 \
1001     HAZARD(__asm__ volatile ("l.nop"));     \
1002     int _lo_; __l_amacr_lo__(m, _lo_, mode, bits); _lo_; })
1003 
1004 /* AMAC combo: 1 instruction short, always shift-rounding */
1005 #if 0 // doesn't work because SPR_AMACHI/LO isn't updated when !AMACR_CLR, or cleared when AMACR_CLR.  In both case, _hi_ isn't what we want
1006 #define L_AMAC_RS_p(m, a1,b1, bits)         \
1007 ({  int _lo_; __l_amul_rs__(_lo_, m,a1,b1, bits); \
1008     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
1009     ((long long)_hi_<<32)|(unsigned)_lo_; })
1010 
1011 #define L_AMACQ_RS_p(m, a1, q, bits)        \
1012 ({  int _lo_; __l_amulq_rs__(_lo_, m,a1,q, bits); \
1013     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
1014     ((long long)_hi_<<32)|(unsigned)_lo_; })
1015 
1016 #define L_AMAC_RS_pp(m, a1,b1, a2,b2, bits) \
1017 ({  __l_amul__(m, a1,b1);                   \
1018     int _lo_; __l_amac_rs__(_lo_, m,a2,b2, bits); \
1019     int _hi_; __l_mfspr_amachi__(m, _hi_);  \
1020     ((long long)_hi_<<32)|(unsigned)_lo_; })
1021 #endif // 0
1022 
1023 #define L_AMAC_RS_p_LO(m, a1,b1, bits)      \
1024 ({  int _lo_; __l_amul_rs__(_lo_, m,a1,b1, bits); _lo_; })
1025 
1026 #define L_AMACQ_RS_p_LO(m, a1, q, bits)     \
1027 ({  int _lo_; __l_amulq_rs__(_lo_, m,a1,q, bits); _lo_; })
1028 
1029 #define L_AMAC_RS_pp_LO(m, a1,b1, a2,b2, bits) \
1030 ({  __l_amul__(m, a1,b1);                   \
1031     int _lo_; __l_amac_rs__(_lo_, m,a2,b2, bits); _lo_; })
1032 
1033 #define L_AMAC_RS_pq_LO(m, a1,b1, a2,q2, bits) \
1034 ({  __l_amul__(m, a1,b1);                   \
1035     int _lo_; __l_amacq_rs__(_lo_, m,a2,q2, bits); _lo_; })
1036 
1037 #define L_AMAC_RS_qq_LO(m, a1,q1, a2,q2, bits) \
1038 ({  __l_amulq__(m, a1,q1);                  \
1039     int _lo_; __l_amacq_rs__(_lo_, m,a2,q2, bits); _lo_; })
1040 
1041 #define L_AMAC_RS_pm_LO(m, a1,b1, a2,b2, bits) \
1042 ({  __l_amul__(m, a1,b1);                   \
1043     int _lo_; __l_amsb_rs__(_lo_, m,a2,b2, bits); _lo_; })
1044 
1045 #define L_AMAC_RS_qn_LO(m, a1,q1, a2,q2, bits) \
1046 ({  __l_amulq__(m, a1,q1);                   \
1047     int _lo_; __l_amsbq_rs__(_lo_, m,a2,q2, bits); _lo_; })
1048 
1049 #define L_AMAC_RS_ppp_LO(m, a1,b1, a2,b2, a3,b3, bits) \
1050 ({  __l_amul__(m, a1,b1);                   \
1051     __l_amac__(m, a2,b2);                   \
1052     int _lo_; __l_amac_rs__(_lo_, m,a3,b3, bits); _lo_; })
1053 
1054 #define L_AMAC_RS_ppm_LO(m, a1,b1, a2,b2, a3,b3, bits) \
1055 ({  __l_amul__(m, a1,b1);                   \
1056     __l_amac__(m, a2,b2);                   \
1057     int _lo_; __l_amsb_rs__(_lo_, m,a3,b3, bits); _lo_; })
1058 
1059 #define L_AMAC_RS_pmm_LO(m, a1,b1, a2,b2, a3,b3, bits) \
1060 ({  __l_amul__(m, a1,b1);                   \
1061     __l_amsb__(m, a2,b2);                   \
1062     int _lo_; __l_amsb_rs__(_lo_, m,a3,b3, bits); _lo_; })
1063 
1064 #define L_AMAC_RS_pppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1065 ({  __l_amul__(m, a1,b1);                   \
1066     __l_amac__(m, a2,b2);                   \
1067     __l_amac__(m, a3,b3);                   \
1068     int _lo_; __l_amac_rs__(_lo_, m,a4,b4, bits); _lo_; })
1069 
1070 #define L_AMAC_RS_ppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1071 ({  __l_amul__(m, a1,b1);                   \
1072     __l_amac__(m, a2,b2);                   \
1073     __l_amsb__(m, a3,b3);                   \
1074     int _lo_; __l_amsb_rs__(_lo_, m,a4,b4, bits); _lo_; })
1075 
1076 #define L_AMAC_RS_pmpm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1077 ({  __l_amul__(m, a1,b1);                   \
1078     __l_amsb__(m, a2,b2);                   \
1079     __l_amac__(m, a3,b3);                   \
1080     int _lo_; __l_amsb_rs__(_lo_, m,a4,b4, bits); _lo_; })
1081 
1082 #define L_AMAC_RS_pmmp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, bits) \
1083 ({  __l_amul__(m, a1,b1);                   \
1084     __l_amsb__(m, a2,b2);                   \
1085     __l_amsb__(m, a3,b3);                   \
1086     int _lo_; __l_amac_rs__(_lo_, m,a4,b4, bits); _lo_; })
1087 
1088 #define L_AMAC_RS_ppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, bits) \
1089 ({  __l_amul__(m, a1,b1);                   \
1090     __l_amac__(m, a2,b2);                   \
1091     __l_amac__(m, a3,b3);                   \
1092     __l_amac__(m, a4,b4);                   \
1093     int _lo_; __l_amac_rs__(_lo_, m,a5,b5, bits); _lo_; })
1094 
1095 #define L_AMAC_RS_pppmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, bits) \
1096 ({  __l_amul__(m, a1,b1);                   \
1097     __l_amac__(m, a2,b2);                   \
1098     __l_amac__(m, a3,b3);                   \
1099     __l_amsb__(m, a4,b4);                   \
1100     int _lo_; __l_amsb_rs__(_lo_, m,a5,b5, bits); _lo_; })
1101 
1102 #define L_AMAC_RS_pppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, bits) \
1103 ({  __l_amul__(m, a1,b1);                   \
1104     __l_amac__(m, a2,b2);                   \
1105     __l_amac__(m, a3,b3);                   \
1106     __l_amac__(m, a4,b4);                   \
1107     __l_amac__(m, a5,b5);                   \
1108     int _lo_; __l_amac_rs__(_lo_, m,a6,b6, bits); _lo_; })
1109 
1110 #define L_AMAC_RS_pppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, bits) \
1111 ({  __l_amul__(m, a1,b1);                   \
1112     __l_amac__(m, a2,b2);                   \
1113     __l_amac__(m, a3,b3);                   \
1114     __l_amac__(m, a4,b4);                   \
1115     __l_amac__(m, a5,b5);                   \
1116     __l_amac__(m, a6,b6);                   \
1117     __l_amac__(m, a7,b7);                   \
1118     int _lo_; __l_amac_rs__(_lo_, m,a8,b8, bits); _lo_; })
1119 
1120 #define L_AMAC_RS_pppppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, a9,b9, a10,b10, bits) \
1121 ({  __l_amul__(m, a1,b1);                   \
1122     __l_amac__(m, a2,b2);                   \
1123     __l_amac__(m, a3,b3);                   \
1124     __l_amac__(m, a4,b4);                   \
1125     __l_amac__(m, a5,b5);                   \
1126     __l_amac__(m, a6,b6);                   \
1127     __l_amac__(m, a7,b7);                   \
1128     __l_amac__(m, a8,b8);                   \
1129     __l_amac__(m, a9,b9);                   \
1130     int _lo_; __l_amac_rs__(_lo_, m,a10,b10, bits); _lo_; })
1131 
1132 #define L_AMAC_RS_ppppppppmmmmmmmm_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, \
1133                             a9,b9, a10,b10, a11,b11, a12,b12, a13,b13, a14,b14, a15,b15, a16,b16, bits) \
1134 ({  __l_amul__(m, a1,b1);                   \
1135     __l_amac__(m, a2,b2);                   \
1136     __l_amac__(m, a3,b3);                   \
1137     __l_amac__(m, a4,b4);                   \
1138     __l_amac__(m, a5,b5);                   \
1139     __l_amac__(m, a6,b6);                   \
1140     __l_amac__(m, a7,b7);                   \
1141     __l_amac__(m, a8,b8);                   \
1142     __l_amsb__(m, a9,b9);                   \
1143     __l_amsb__(m, a10,b10);                 \
1144     __l_amsb__(m, a11,b11);                 \
1145     __l_amsb__(m, a12,b12);                 \
1146     __l_amsb__(m, a13,b13);                 \
1147     __l_amsb__(m, a14,b14);                 \
1148     __l_amsb__(m, a15,b15);                 \
1149     int _lo_; __l_amsb_rs__(_lo_, m,a16,b16, bits); _lo_; })
1150 
1151 #define L_AMAC_RS_pppppppppppppppp_LO(m, a1,b1, a2,b2, a3,b3, a4,b4, a5,b5, a6,b6, a7,b7, a8,b8, \
1152                             a9,b9, a10,b10, a11,b11, a12,b12, a13,b13, a14,b14, a15,b15, a16,b16, bits) \
1153 ({  __l_amul__(m, a1,b1);                   \
1154     __l_amac__(m, a2,b2);                   \
1155     __l_amac__(m, a3,b3);                   \
1156     __l_amac__(m, a4,b4);                   \
1157     __l_amac__(m, a5,b5);                   \
1158     __l_amac__(m, a6,b6);                   \
1159     __l_amac__(m, a7,b7);                   \
1160     __l_amac__(m, a8,b8);                   \
1161     __l_amac__(m, a9,b9);                   \
1162     __l_amac__(m, a10,b10);                 \
1163     __l_amac__(m, a11,b11);                 \
1164     __l_amac__(m, a12,b12);                 \
1165     __l_amac__(m, a13,b13);                 \
1166     __l_amac__(m, a14,b14);                 \
1167     __l_amac__(m, a15,b15);                 \
1168     int _lo_; __l_amac_rs__(_lo_, m,a16,b16, bits); _lo_; })
1169 
1170 #define __l_divl__(lo, a,b, bits)           \
1171    { int _a_ = (a), _b_ = (b); __asm__ volatile ("l.divl\t%0,%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
1172 #define __l_divlu__(lo, a,b, bits)          \
1173    { unsigned _a_ = (a), _b_ = (b); __asm__ volatile ("l.divlu\t%0,%1,%2,%3" : "=r"(lo) : "r"(_a_), "r"(_b_), "K"(bits)); }
1174 
1175 /* Left-shift before divide */
1176 #define L_DIVL(a,b, bits)  ({ int _lo_; __l_divl__(_lo_, a,b, bits); _lo_; })
1177 #define L_DIVLU(a,b, bits)  ({ unsigned _lo_; __l_divlu__(_lo_, a,b, bits); _lo_; })
1178 
1179 /* Arithmetic-Shift-Right & Clip to 16-bit */
1180 
1181 #define __SRA_CLIP16(shift_right, t)                                    \
1182 ({  int _u_, _t_ = t;                                                   \
1183     __asm__("l.srai     %0,%0,%1"     : "+r"(_t_)  : "K"(shift_right)); \
1184     __asm__("l.sfgesi   %0,-32768"    :          : "r"(_t_));           \
1185     __asm__("l.addi     %0,r0,-32768" : "=r"(_u_)  :       );           \
1186     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1187     __asm__("l.sflesi   %0,32767"     :          : "r"(_t_));           \
1188     __asm__("l.addi     %0,r0,32767"  : "=r"(_u_)  :       );           \
1189     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1190     _t_; })
1191 
1192 #define __CLIP16(t)                                                     \
1193 ({  int _u_, _t_ = t;                                                   \
1194     __asm__("l.sfgesi   %0,-32768"    :          : "r"(_t_));           \
1195     __asm__("l.addi     %0,r0,-32768" : "=r"(_u_)  :       );           \
1196     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1197     __asm__("l.sflesi   %0,32767"     :          : "r"(_t_));           \
1198     __asm__("l.addi     %0,r0,32767"  : "=r"(_u_)  :       );           \
1199     __asm__("l.cmov     %0,%0,%1"     : "+r"(_t_)  : "r"(_u_));         \
1200     _t_; })
1201 
1202 #define __SRA_CLIP16_ex(shift_right, t)     \
1203 ({  int _result_, _t_ = t;                  \
1204     __asm__ volatile ("l.srari\t%0,%1,%2" : "=r"(_result_) : "r"(_t_), "K"(shift_right) ); \
1205     __asm__ volatile ("l.sat\t%0,%1,16" : "=r"(_result_) : "r"(_result_) ); \
1206     _result_;})
1207 
1208 #define __CLIP16_ex(t)      \
1209 ({  int _result_, _t_ = t;  \
1210     __asm__ volatile ("l.sat\t%0,%1,16" : "=r"(_result_) : "r"(_t_) ); \
1211     _result_;})
1212 
1213 /* Extract, Deposit, and Saturate */
1214 
1215 #define L_EXT(v, width, bitpos)             \
1216 ({  int _result_, _v_ = v, _b_ = bitpos;    \
1217     __asm__ volatile ("l.ext\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "r"(_b_)); \
1218     _result_;})
1219 
1220 #define L_EXTI(v, width, bitpos)            \
1221 ({  int _result_, _v_ = v;                  \
1222     __asm__ volatile ("l.exti\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "K"(bitpos)); \
1223     _result_;})
1224 
1225 #define L_EXTU(v, width, bitpos)            \
1226 ({  int _result_, _v_ = v, _b_ = bitpos;    \
1227     __asm__ volatile ("l.extu\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "r"(_b_)); \
1228     _result_;})
1229 
1230 #define L_EXTUI(v, width, bitpos)           \
1231 ({  int _result_, _v_ = v;                  \
1232     __asm__ volatile ("l.extui\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "K"(width), "K"(bitpos)); \
1233     _result_;})
1234 
1235 #define L_EXTR(v, width, bitpos)            \
1236 ({  int _result_, _v_ = v, _width_ = width, _b_ = bitpos;    \
1237     __asm__ volatile ("l.extr\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "r"(_b_)); \
1238     _result_;})
1239 
1240 #define L_EXTRI(v, width, bitpos)           \
1241 ({  int _result_, _v_ = v, _width_ = width; \
1242     __asm__ volatile ("l.extri\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "K"(bitpos)); \
1243     _result_;})
1244 
1245 #define L_EXTRU(v, width, bitpos)           \
1246 ({  int _result_, _v_ = v, _width_ = width, _b_ = bitpos; \
1247     __asm__ volatile ("l.extru\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "r"(_b_)); \
1248     _result_;})
1249 
1250 #define L_EXTRUI(v, width, bitpos)          \
1251 ({  int _result_, _v_ = v, _width_ = width; \
1252     __asm__ volatile ("l.extrui\t%0,%1,%2,%3" : "=r"(_result_) : "r"(_v_), "r"(_width_), "K"(bitpos)); \
1253     _result_;})
1254 
1255 #define L_DEP(d, v, width, bitpos)          \
1256 ({  int _v_ = v, _b_ = bitpos;              \
1257     __asm__ volatile ("l.dep\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "r"(_b_)); \
1258     d;})
1259 
1260 #define L_DEPI(v, width, bitpos)            \
1261 ({  int _v_ = v;                            \
1262     __asm__ volatile ("l.depi\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "K"(bitpos)); \
1263     d;})
1264 
1265 #define L_DEPR(d, v, width, bitpos)         \
1266 ({  int _v_ = v, _width_ = width, _b_ = bitpos; \
1267     __asm__ volatile ("l.depr\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "r"(_width_), "r"(_b_)); \
1268     d;})
1269 
1270 #define L_DEPRI(v, width, bitpos)           \
1271 ({  int _v_ = v, _width_ = width;           \
1272     __asm__ volatile ("l.depri\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "r"(_width_), "K"(bitpos)); \
1273     d;})
1274 
1275 #define L_DEPOV(d)                          \
1276 ({  __asm__ volatile ("l.depov\t%0" : "+r"(d) : ); \
1277     d;})
1278 
1279 #define L_SAT(v, width)                     \
1280 ({  int _result_, _v_ = v;                  \
1281     __asm__ volatile ("l.sat\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1282     _result_;})
1283 
1284 #define L_SATU(v, width)                    \
1285 ({  int _result_, _v_ = v;                  \
1286     __asm__ volatile ("l.satu\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1287     _result_;})
1288 
1289 #define L_SATSU(v, width)                   \
1290 ({  int _result_, _v_ = v;                  \
1291     __asm__ volatile ("l.satsu\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1292     _result_;})
1293 
1294 #define L_SATUS(v, width)                   \
1295 ({  int _result_, _v_ = v;                  \
1296     __asm__ volatile ("l.satus\t%0,%1,%2" : "=r"(_result_) : "r"(_v_), "K"(width)); \
1297     _result_;})
1298 
1299 #define L_SATDEP(d, v, width, bitpos)       \
1300 ({  int _v_ = v, _b_ = bitpos;    \
1301     __asm__ volatile ("l.satdep\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "r"(_b_)); \
1302     d;})
1303 
1304 #define L_SATDEPI(d, v, width, bitpos)      \
1305 ({  int _v_ = v;                  \
1306     __asm__ volatile ("l.satdepi\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "K"(bitpos)); \
1307     d;})
1308 
1309 #define L_SATDEPU(d, v, width, bitpos)      \
1310 ({  int _v_ = v, _b_ = bitpos;    \
1311     __asm__ volatile ("l.satdepu\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "r"(_b_)); \
1312     d;})
1313 
1314 #define L_SATDEPUI(d, v, width, bitpos)     \
1315 ({  int _v_ = v;                  \
1316     __asm__ volatile ("l.satdepui\t%0,%1,%2,%3" : "+r"(d) : "r"(_v_), "K"(width), "K"(bitpos)); \
1317     d;})
1318 
1319 /* FFT */
1320 #define __l_fft__(fft, n, re,im, mode)      \
1321    { void *_re_ = (void*)(re), *_im_ = (void*)(im); __asm__ volatile (fft "\t%0,%1,%2,%3" :  : "K"(n), "r"(_re_), "r"(_im_), "K"(mode)); }
1322 
1323 #define L_FFT(n, re,im, mode)       __l_fft__("l.fft", n, re,im, mode)
1324 #define L_FFTR(n, re,im, mode)      __l_fft__("l.fftr", n, re,im, mode)
1325 #define L_IFFT(n, re,im, mode)      __l_fft__("l.ifft", n, re,im, mode)
1326 #define L_IFFTR(n, re,im, mode)     __l_fft__("l.ifftr", n, re,im, mode)
1327 #define L_WAIT_FOR_FFT  while(L_MFSPR1(SPR_ACCEL_FFT) != 0);
1328 
1329 /* ABS/COUNT1/BYTESWAP/MAX/MIN */
1330 #define L_ABS(a)                            \
1331 ({  int _res_, _a_ = a;                     \
1332     __asm__ volatile ("l.abs\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1333     _res_;})
1334 
1335 #define L_COUNT1(a)                         \
1336 ({  int _res_, _a_ = a;                     \
1337     __asm__ volatile ("l.count1\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1338     _res_;})
1339 
1340 #define L_BYTESWAP(a)                       \
1341 ({  int _res_, _a_ = a;                     \
1342     __asm__ volatile ("l.byteswap\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1343     _res_;})
1344 
1345 #define L_BITSWAP(a)                        \
1346 ({  int _res_, _a_ = a;                     \
1347     __asm__ volatile ("l.bitswap\t%0,%1" : "=r"(_res_) : "r"(_a_)); \
1348     _res_;})
1349 
1350 #define L_MAX(a,b)                          \
1351 ({  int _res_, _a_ = a, _b_ = b;            \
1352     __asm__ volatile ("l.max\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1353     _res_;})
1354 
1355 #define L_MAXU(a,b)                         \
1356 ({  int _res_, _a_ = a, _b_ = b;            \
1357     __asm__ volatile ("l.maxu\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1358     _res_;})
1359 
1360 #define L_MIN(a,b)                          \
1361 ({  int _res_, _a_ = a, _b_ = b;            \
1362     __asm__ volatile ("l.min\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1363     _res_;})
1364 
1365 #define L_MINU(a,b)                         \
1366 ({  int _res_, _a_ = a, _b_ = b;            \
1367     __asm__ volatile ("l.minu\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1368     _res_;})
1369 
1370 #define L_MAXI(a,b)                         \
1371 ({  int _res_, _a_ = a;                     \
1372     __asm__ volatile ("l.maxi\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1373     _res_;})
1374 
1375 #define L_MAXUI(a,b)                        \
1376 ({  int _res_, _a_ = a;                     \
1377     __asm__ volatile ("l.maxui\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1378     _res_;})
1379 
1380 #define L_MINI(a,b)                         \
1381 ({  int _res_, _a_ = a;                     \
1382     __asm__ volatile ("l.mini\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1383     _res_;})
1384 
1385 #define L_MINUI(a,b)                        \
1386 ({  int _res_, _a_ = a;                     \
1387     __asm__ volatile ("l.minui\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "I"(b)); \
1388     _res_;})
1389 
1390 /* Bi-directional shift arithmetic/logical */
1391 #define L_BSA(a,b)                          \
1392 ({  int _res_, _a_ = a, _b_ = b;            \
1393     __asm__ volatile ("l.bsa\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1394     _res_;})
1395 
1396 #define L_BSL(a,b)                          \
1397 ({  int _res_, _a_ = a, _b_ = b;            \
1398     __asm__ volatile ("l.bsl\t%0,%1,%2" : "=r"(_res_) : "r"(_a_), "r"(_b_)); \
1399     _res_;})
1400 
1401 /* Double-shift */
1402 #define L_DSL(a,b,cnt)                      \
1403 ({  int _a_ = a, _b_ = b, _cnt_ = cnt;      \
1404     __asm__ volatile ("l.dsl\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "r"(_cnt_)); \
1405     _a_;})
1406 
1407 #define L_DSR(a,b,cnt)                      \
1408 ({  int _a_ = a, _b_ = b, _cnt_ = cnt;      \
1409     __asm__ volatile ("l.dsr\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "r"(_cnt_)); \
1410     _a_;})
1411 
1412 #define L_DSLI(a,b,cnt)                     \
1413 ({  int _a_ = a, _b_ = b;                   \
1414     __asm__ volatile ("l.dsl\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "K"(cnt)); \
1415     _a_;})
1416 
1417 #define L_DSRI(a,b,cnt)                     \
1418 ({  int _a_ = a, _b_ = b;                   \
1419     __asm__ volatile ("l.dsr\t%0,%1,%2" : "+r"(_a_) : "r"(_b_), "K"(cnt)); \
1420     _a_;})
1421 
1422 #endif /* __aeon__ */
1423 
1424 #endif /* __SPR_DEFS_H */
1425