xref: /OK3568_Linux_fs/kernel/arch/x86/net/bpf_jit_comp32.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /*
3*4882a593Smuzhiyun  * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
4*4882a593Smuzhiyun  *
5*4882a593Smuzhiyun  * Author: Wang YanQing (udknight@gmail.com)
6*4882a593Smuzhiyun  * The code based on code and ideas from:
7*4882a593Smuzhiyun  * Eric Dumazet (eric.dumazet@gmail.com)
8*4882a593Smuzhiyun  * and from:
9*4882a593Smuzhiyun  * Shubham Bansal <illusionist.neo@gmail.com>
10*4882a593Smuzhiyun  */
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #include <linux/netdevice.h>
13*4882a593Smuzhiyun #include <linux/filter.h>
14*4882a593Smuzhiyun #include <linux/if_vlan.h>
15*4882a593Smuzhiyun #include <asm/cacheflush.h>
16*4882a593Smuzhiyun #include <asm/set_memory.h>
17*4882a593Smuzhiyun #include <asm/nospec-branch.h>
18*4882a593Smuzhiyun #include <asm/asm-prototypes.h>
19*4882a593Smuzhiyun #include <linux/bpf.h>
20*4882a593Smuzhiyun 
21*4882a593Smuzhiyun /*
22*4882a593Smuzhiyun  * eBPF prog stack layout:
23*4882a593Smuzhiyun  *
24*4882a593Smuzhiyun  *                         high
25*4882a593Smuzhiyun  * original ESP =>        +-----+
26*4882a593Smuzhiyun  *                        |     | callee saved registers
27*4882a593Smuzhiyun  *                        +-----+
28*4882a593Smuzhiyun  *                        | ... | eBPF JIT scratch space
29*4882a593Smuzhiyun  * BPF_FP,IA32_EBP  =>    +-----+
30*4882a593Smuzhiyun  *                        | ... | eBPF prog stack
31*4882a593Smuzhiyun  *                        +-----+
32*4882a593Smuzhiyun  *                        |RSVD | JIT scratchpad
33*4882a593Smuzhiyun  * current ESP =>         +-----+
34*4882a593Smuzhiyun  *                        |     |
35*4882a593Smuzhiyun  *                        | ... | Function call stack
36*4882a593Smuzhiyun  *                        |     |
37*4882a593Smuzhiyun  *                        +-----+
38*4882a593Smuzhiyun  *                          low
39*4882a593Smuzhiyun  *
40*4882a593Smuzhiyun  * The callee saved registers:
41*4882a593Smuzhiyun  *
42*4882a593Smuzhiyun  *                                high
43*4882a593Smuzhiyun  * original ESP =>        +------------------+ \
44*4882a593Smuzhiyun  *                        |        ebp       | |
45*4882a593Smuzhiyun  * current EBP =>         +------------------+ } callee saved registers
46*4882a593Smuzhiyun  *                        |    ebx,esi,edi   | |
47*4882a593Smuzhiyun  *                        +------------------+ /
48*4882a593Smuzhiyun  *                                low
49*4882a593Smuzhiyun  */
50*4882a593Smuzhiyun 
emit_code(u8 * ptr,u32 bytes,unsigned int len)51*4882a593Smuzhiyun static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
52*4882a593Smuzhiyun {
53*4882a593Smuzhiyun 	if (len == 1)
54*4882a593Smuzhiyun 		*ptr = bytes;
55*4882a593Smuzhiyun 	else if (len == 2)
56*4882a593Smuzhiyun 		*(u16 *)ptr = bytes;
57*4882a593Smuzhiyun 	else {
58*4882a593Smuzhiyun 		*(u32 *)ptr = bytes;
59*4882a593Smuzhiyun 		barrier();
60*4882a593Smuzhiyun 	}
61*4882a593Smuzhiyun 	return ptr + len;
62*4882a593Smuzhiyun }
63*4882a593Smuzhiyun 
64*4882a593Smuzhiyun #define EMIT(bytes, len) \
65*4882a593Smuzhiyun 	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
66*4882a593Smuzhiyun 
67*4882a593Smuzhiyun #define EMIT1(b1)		EMIT(b1, 1)
68*4882a593Smuzhiyun #define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
69*4882a593Smuzhiyun #define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
70*4882a593Smuzhiyun #define EMIT4(b1, b2, b3, b4)   \
71*4882a593Smuzhiyun 	EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
72*4882a593Smuzhiyun 
73*4882a593Smuzhiyun #define EMIT1_off32(b1, off) \
74*4882a593Smuzhiyun 	do { EMIT1(b1); EMIT(off, 4); } while (0)
75*4882a593Smuzhiyun #define EMIT2_off32(b1, b2, off) \
76*4882a593Smuzhiyun 	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
77*4882a593Smuzhiyun #define EMIT3_off32(b1, b2, b3, off) \
78*4882a593Smuzhiyun 	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
79*4882a593Smuzhiyun #define EMIT4_off32(b1, b2, b3, b4, off) \
80*4882a593Smuzhiyun 	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
81*4882a593Smuzhiyun 
82*4882a593Smuzhiyun #define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
83*4882a593Smuzhiyun 
is_imm8(int value)84*4882a593Smuzhiyun static bool is_imm8(int value)
85*4882a593Smuzhiyun {
86*4882a593Smuzhiyun 	return value <= 127 && value >= -128;
87*4882a593Smuzhiyun }
88*4882a593Smuzhiyun 
is_simm32(s64 value)89*4882a593Smuzhiyun static bool is_simm32(s64 value)
90*4882a593Smuzhiyun {
91*4882a593Smuzhiyun 	return value == (s64) (s32) value;
92*4882a593Smuzhiyun }
93*4882a593Smuzhiyun 
94*4882a593Smuzhiyun #define STACK_OFFSET(k)	(k)
95*4882a593Smuzhiyun #define TCALL_CNT	(MAX_BPF_JIT_REG + 0)	/* Tail Call Count */
96*4882a593Smuzhiyun 
97*4882a593Smuzhiyun #define IA32_EAX	(0x0)
98*4882a593Smuzhiyun #define IA32_EBX	(0x3)
99*4882a593Smuzhiyun #define IA32_ECX	(0x1)
100*4882a593Smuzhiyun #define IA32_EDX	(0x2)
101*4882a593Smuzhiyun #define IA32_ESI	(0x6)
102*4882a593Smuzhiyun #define IA32_EDI	(0x7)
103*4882a593Smuzhiyun #define IA32_EBP	(0x5)
104*4882a593Smuzhiyun #define IA32_ESP	(0x4)
105*4882a593Smuzhiyun 
106*4882a593Smuzhiyun /*
107*4882a593Smuzhiyun  * List of x86 cond jumps opcodes (. + s8)
108*4882a593Smuzhiyun  * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
109*4882a593Smuzhiyun  */
110*4882a593Smuzhiyun #define IA32_JB  0x72
111*4882a593Smuzhiyun #define IA32_JAE 0x73
112*4882a593Smuzhiyun #define IA32_JE  0x74
113*4882a593Smuzhiyun #define IA32_JNE 0x75
114*4882a593Smuzhiyun #define IA32_JBE 0x76
115*4882a593Smuzhiyun #define IA32_JA  0x77
116*4882a593Smuzhiyun #define IA32_JL  0x7C
117*4882a593Smuzhiyun #define IA32_JGE 0x7D
118*4882a593Smuzhiyun #define IA32_JLE 0x7E
119*4882a593Smuzhiyun #define IA32_JG  0x7F
120*4882a593Smuzhiyun 
121*4882a593Smuzhiyun #define COND_JMP_OPCODE_INVALID	(0xFF)
122*4882a593Smuzhiyun 
123*4882a593Smuzhiyun /*
124*4882a593Smuzhiyun  * Map eBPF registers to IA32 32bit registers or stack scratch space.
125*4882a593Smuzhiyun  *
126*4882a593Smuzhiyun  * 1. All the registers, R0-R10, are mapped to scratch space on stack.
127*4882a593Smuzhiyun  * 2. We need two 64 bit temp registers to do complex operations on eBPF
128*4882a593Smuzhiyun  *    registers.
129*4882a593Smuzhiyun  * 3. For performance reason, the BPF_REG_AX for blinding constant, is
130*4882a593Smuzhiyun  *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
131*4882a593Smuzhiyun  *
132*4882a593Smuzhiyun  * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
133*4882a593Smuzhiyun  * registers, we have to map each eBPF registers with two IA32 32 bit regs
134*4882a593Smuzhiyun  * or scratch memory space and we have to build eBPF 64 bit register from those.
135*4882a593Smuzhiyun  *
136*4882a593Smuzhiyun  * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
137*4882a593Smuzhiyun  */
138*4882a593Smuzhiyun static const u8 bpf2ia32[][2] = {
139*4882a593Smuzhiyun 	/* Return value from in-kernel function, and exit value from eBPF */
140*4882a593Smuzhiyun 	[BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	/* The arguments from eBPF program to in-kernel function */
143*4882a593Smuzhiyun 	/* Stored on stack scratch space */
144*4882a593Smuzhiyun 	[BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
145*4882a593Smuzhiyun 	[BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
146*4882a593Smuzhiyun 	[BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
147*4882a593Smuzhiyun 	[BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
148*4882a593Smuzhiyun 	[BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
149*4882a593Smuzhiyun 
150*4882a593Smuzhiyun 	/* Callee saved registers that in-kernel function will preserve */
151*4882a593Smuzhiyun 	/* Stored on stack scratch space */
152*4882a593Smuzhiyun 	[BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
153*4882a593Smuzhiyun 	[BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
154*4882a593Smuzhiyun 	[BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
155*4882a593Smuzhiyun 	[BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
156*4882a593Smuzhiyun 
157*4882a593Smuzhiyun 	/* Read only Frame Pointer to access Stack */
158*4882a593Smuzhiyun 	[BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
159*4882a593Smuzhiyun 
160*4882a593Smuzhiyun 	/* Temporary register for blinding constants. */
161*4882a593Smuzhiyun 	[BPF_REG_AX] = {IA32_ESI, IA32_EDI},
162*4882a593Smuzhiyun 
163*4882a593Smuzhiyun 	/* Tail call count. Stored on stack scratch space. */
164*4882a593Smuzhiyun 	[TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
165*4882a593Smuzhiyun };
166*4882a593Smuzhiyun 
167*4882a593Smuzhiyun #define dst_lo	dst[0]
168*4882a593Smuzhiyun #define dst_hi	dst[1]
169*4882a593Smuzhiyun #define src_lo	src[0]
170*4882a593Smuzhiyun #define src_hi	src[1]
171*4882a593Smuzhiyun 
172*4882a593Smuzhiyun #define STACK_ALIGNMENT	8
173*4882a593Smuzhiyun /*
174*4882a593Smuzhiyun  * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
175*4882a593Smuzhiyun  * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
176*4882a593Smuzhiyun  * BPF_REG_FP, BPF_REG_AX and Tail call counts.
177*4882a593Smuzhiyun  */
178*4882a593Smuzhiyun #define SCRATCH_SIZE 96
179*4882a593Smuzhiyun 
180*4882a593Smuzhiyun /* Total stack size used in JITed code */
181*4882a593Smuzhiyun #define _STACK_SIZE	(stack_depth + SCRATCH_SIZE)
182*4882a593Smuzhiyun 
183*4882a593Smuzhiyun #define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
184*4882a593Smuzhiyun 
185*4882a593Smuzhiyun /* Get the offset of eBPF REGISTERs stored on scratch space. */
186*4882a593Smuzhiyun #define STACK_VAR(off) (off)
187*4882a593Smuzhiyun 
188*4882a593Smuzhiyun /* Encode 'dst_reg' register into IA32 opcode 'byte' */
add_1reg(u8 byte,u32 dst_reg)189*4882a593Smuzhiyun static u8 add_1reg(u8 byte, u32 dst_reg)
190*4882a593Smuzhiyun {
191*4882a593Smuzhiyun 	return byte + dst_reg;
192*4882a593Smuzhiyun }
193*4882a593Smuzhiyun 
194*4882a593Smuzhiyun /* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
add_2reg(u8 byte,u32 dst_reg,u32 src_reg)195*4882a593Smuzhiyun static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
196*4882a593Smuzhiyun {
197*4882a593Smuzhiyun 	return byte + dst_reg + (src_reg << 3);
198*4882a593Smuzhiyun }
199*4882a593Smuzhiyun 
jit_fill_hole(void * area,unsigned int size)200*4882a593Smuzhiyun static void jit_fill_hole(void *area, unsigned int size)
201*4882a593Smuzhiyun {
202*4882a593Smuzhiyun 	/* Fill whole space with int3 instructions */
203*4882a593Smuzhiyun 	memset(area, 0xcc, size);
204*4882a593Smuzhiyun }
205*4882a593Smuzhiyun 
emit_ia32_mov_i(const u8 dst,const u32 val,bool dstk,u8 ** pprog)206*4882a593Smuzhiyun static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
207*4882a593Smuzhiyun 				   u8 **pprog)
208*4882a593Smuzhiyun {
209*4882a593Smuzhiyun 	u8 *prog = *pprog;
210*4882a593Smuzhiyun 	int cnt = 0;
211*4882a593Smuzhiyun 
212*4882a593Smuzhiyun 	if (dstk) {
213*4882a593Smuzhiyun 		if (val == 0) {
214*4882a593Smuzhiyun 			/* xor eax,eax */
215*4882a593Smuzhiyun 			EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
216*4882a593Smuzhiyun 			/* mov dword ptr [ebp+off],eax */
217*4882a593Smuzhiyun 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
218*4882a593Smuzhiyun 			      STACK_VAR(dst));
219*4882a593Smuzhiyun 		} else {
220*4882a593Smuzhiyun 			EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
221*4882a593Smuzhiyun 				    STACK_VAR(dst), val);
222*4882a593Smuzhiyun 		}
223*4882a593Smuzhiyun 	} else {
224*4882a593Smuzhiyun 		if (val == 0)
225*4882a593Smuzhiyun 			EMIT2(0x33, add_2reg(0xC0, dst, dst));
226*4882a593Smuzhiyun 		else
227*4882a593Smuzhiyun 			EMIT2_off32(0xC7, add_1reg(0xC0, dst),
228*4882a593Smuzhiyun 				    val);
229*4882a593Smuzhiyun 	}
230*4882a593Smuzhiyun 	*pprog = prog;
231*4882a593Smuzhiyun }
232*4882a593Smuzhiyun 
233*4882a593Smuzhiyun /* dst = imm (4 bytes)*/
emit_ia32_mov_r(const u8 dst,const u8 src,bool dstk,bool sstk,u8 ** pprog)234*4882a593Smuzhiyun static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
235*4882a593Smuzhiyun 				   bool sstk, u8 **pprog)
236*4882a593Smuzhiyun {
237*4882a593Smuzhiyun 	u8 *prog = *pprog;
238*4882a593Smuzhiyun 	int cnt = 0;
239*4882a593Smuzhiyun 	u8 sreg = sstk ? IA32_EAX : src;
240*4882a593Smuzhiyun 
241*4882a593Smuzhiyun 	if (sstk)
242*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
243*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
244*4882a593Smuzhiyun 	if (dstk)
245*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],eax */
246*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
247*4882a593Smuzhiyun 	else
248*4882a593Smuzhiyun 		/* mov dst,sreg */
249*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dst, sreg));
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	*pprog = prog;
252*4882a593Smuzhiyun }
253*4882a593Smuzhiyun 
254*4882a593Smuzhiyun /* dst = src */
emit_ia32_mov_r64(const bool is64,const u8 dst[],const u8 src[],bool dstk,bool sstk,u8 ** pprog,const struct bpf_prog_aux * aux)255*4882a593Smuzhiyun static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
256*4882a593Smuzhiyun 				     const u8 src[], bool dstk,
257*4882a593Smuzhiyun 				     bool sstk, u8 **pprog,
258*4882a593Smuzhiyun 				     const struct bpf_prog_aux *aux)
259*4882a593Smuzhiyun {
260*4882a593Smuzhiyun 	emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
261*4882a593Smuzhiyun 	if (is64)
262*4882a593Smuzhiyun 		/* complete 8 byte move */
263*4882a593Smuzhiyun 		emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
264*4882a593Smuzhiyun 	else if (!aux->verifier_zext)
265*4882a593Smuzhiyun 		/* zero out high 4 bytes */
266*4882a593Smuzhiyun 		emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
267*4882a593Smuzhiyun }
268*4882a593Smuzhiyun 
269*4882a593Smuzhiyun /* Sign extended move */
emit_ia32_mov_i64(const bool is64,const u8 dst[],const u32 val,bool dstk,u8 ** pprog)270*4882a593Smuzhiyun static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
271*4882a593Smuzhiyun 				     const u32 val, bool dstk, u8 **pprog)
272*4882a593Smuzhiyun {
273*4882a593Smuzhiyun 	u32 hi = 0;
274*4882a593Smuzhiyun 
275*4882a593Smuzhiyun 	if (is64 && (val & (1<<31)))
276*4882a593Smuzhiyun 		hi = (u32)~0;
277*4882a593Smuzhiyun 	emit_ia32_mov_i(dst_lo, val, dstk, pprog);
278*4882a593Smuzhiyun 	emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
279*4882a593Smuzhiyun }
280*4882a593Smuzhiyun 
281*4882a593Smuzhiyun /*
282*4882a593Smuzhiyun  * ALU operation (32 bit)
283*4882a593Smuzhiyun  * dst = dst * src
284*4882a593Smuzhiyun  */
emit_ia32_mul_r(const u8 dst,const u8 src,bool dstk,bool sstk,u8 ** pprog)285*4882a593Smuzhiyun static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
286*4882a593Smuzhiyun 				   bool sstk, u8 **pprog)
287*4882a593Smuzhiyun {
288*4882a593Smuzhiyun 	u8 *prog = *pprog;
289*4882a593Smuzhiyun 	int cnt = 0;
290*4882a593Smuzhiyun 	u8 sreg = sstk ? IA32_ECX : src;
291*4882a593Smuzhiyun 
292*4882a593Smuzhiyun 	if (sstk)
293*4882a593Smuzhiyun 		/* mov ecx,dword ptr [ebp+off] */
294*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	if (dstk)
297*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
298*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
299*4882a593Smuzhiyun 	else
300*4882a593Smuzhiyun 		/* mov eax,dst */
301*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
302*4882a593Smuzhiyun 
303*4882a593Smuzhiyun 
304*4882a593Smuzhiyun 	EMIT2(0xF7, add_1reg(0xE0, sreg));
305*4882a593Smuzhiyun 
306*4882a593Smuzhiyun 	if (dstk)
307*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],eax */
308*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
309*4882a593Smuzhiyun 		      STACK_VAR(dst));
310*4882a593Smuzhiyun 	else
311*4882a593Smuzhiyun 		/* mov dst,eax */
312*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	*pprog = prog;
315*4882a593Smuzhiyun }
316*4882a593Smuzhiyun 
emit_ia32_to_le_r64(const u8 dst[],s32 val,bool dstk,u8 ** pprog,const struct bpf_prog_aux * aux)317*4882a593Smuzhiyun static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
318*4882a593Smuzhiyun 					 bool dstk, u8 **pprog,
319*4882a593Smuzhiyun 					 const struct bpf_prog_aux *aux)
320*4882a593Smuzhiyun {
321*4882a593Smuzhiyun 	u8 *prog = *pprog;
322*4882a593Smuzhiyun 	int cnt = 0;
323*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
324*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
325*4882a593Smuzhiyun 
326*4882a593Smuzhiyun 	if (dstk && val != 64) {
327*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
328*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
329*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
330*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
331*4882a593Smuzhiyun 	}
332*4882a593Smuzhiyun 	switch (val) {
333*4882a593Smuzhiyun 	case 16:
334*4882a593Smuzhiyun 		/*
335*4882a593Smuzhiyun 		 * Emit 'movzwl eax,ax' to zero extend 16-bit
336*4882a593Smuzhiyun 		 * into 64 bit
337*4882a593Smuzhiyun 		 */
338*4882a593Smuzhiyun 		EMIT2(0x0F, 0xB7);
339*4882a593Smuzhiyun 		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
340*4882a593Smuzhiyun 		if (!aux->verifier_zext)
341*4882a593Smuzhiyun 			/* xor dreg_hi,dreg_hi */
342*4882a593Smuzhiyun 			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
343*4882a593Smuzhiyun 		break;
344*4882a593Smuzhiyun 	case 32:
345*4882a593Smuzhiyun 		if (!aux->verifier_zext)
346*4882a593Smuzhiyun 			/* xor dreg_hi,dreg_hi */
347*4882a593Smuzhiyun 			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
348*4882a593Smuzhiyun 		break;
349*4882a593Smuzhiyun 	case 64:
350*4882a593Smuzhiyun 		/* nop */
351*4882a593Smuzhiyun 		break;
352*4882a593Smuzhiyun 	}
353*4882a593Smuzhiyun 
354*4882a593Smuzhiyun 	if (dstk && val != 64) {
355*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
356*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
357*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
358*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
359*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
360*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
361*4882a593Smuzhiyun 	}
362*4882a593Smuzhiyun 	*pprog = prog;
363*4882a593Smuzhiyun }
364*4882a593Smuzhiyun 
emit_ia32_to_be_r64(const u8 dst[],s32 val,bool dstk,u8 ** pprog,const struct bpf_prog_aux * aux)365*4882a593Smuzhiyun static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
366*4882a593Smuzhiyun 				       bool dstk, u8 **pprog,
367*4882a593Smuzhiyun 				       const struct bpf_prog_aux *aux)
368*4882a593Smuzhiyun {
369*4882a593Smuzhiyun 	u8 *prog = *pprog;
370*4882a593Smuzhiyun 	int cnt = 0;
371*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
372*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
373*4882a593Smuzhiyun 
374*4882a593Smuzhiyun 	if (dstk) {
375*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
376*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
377*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
378*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
379*4882a593Smuzhiyun 	}
380*4882a593Smuzhiyun 	switch (val) {
381*4882a593Smuzhiyun 	case 16:
382*4882a593Smuzhiyun 		/* Emit 'ror %ax, 8' to swap lower 2 bytes */
383*4882a593Smuzhiyun 		EMIT1(0x66);
384*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
385*4882a593Smuzhiyun 
386*4882a593Smuzhiyun 		EMIT2(0x0F, 0xB7);
387*4882a593Smuzhiyun 		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
388*4882a593Smuzhiyun 
389*4882a593Smuzhiyun 		if (!aux->verifier_zext)
390*4882a593Smuzhiyun 			/* xor dreg_hi,dreg_hi */
391*4882a593Smuzhiyun 			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
392*4882a593Smuzhiyun 		break;
393*4882a593Smuzhiyun 	case 32:
394*4882a593Smuzhiyun 		/* Emit 'bswap eax' to swap lower 4 bytes */
395*4882a593Smuzhiyun 		EMIT1(0x0F);
396*4882a593Smuzhiyun 		EMIT1(add_1reg(0xC8, dreg_lo));
397*4882a593Smuzhiyun 
398*4882a593Smuzhiyun 		if (!aux->verifier_zext)
399*4882a593Smuzhiyun 			/* xor dreg_hi,dreg_hi */
400*4882a593Smuzhiyun 			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
401*4882a593Smuzhiyun 		break;
402*4882a593Smuzhiyun 	case 64:
403*4882a593Smuzhiyun 		/* Emit 'bswap eax' to swap lower 4 bytes */
404*4882a593Smuzhiyun 		EMIT1(0x0F);
405*4882a593Smuzhiyun 		EMIT1(add_1reg(0xC8, dreg_lo));
406*4882a593Smuzhiyun 
407*4882a593Smuzhiyun 		/* Emit 'bswap edx' to swap lower 4 bytes */
408*4882a593Smuzhiyun 		EMIT1(0x0F);
409*4882a593Smuzhiyun 		EMIT1(add_1reg(0xC8, dreg_hi));
410*4882a593Smuzhiyun 
411*4882a593Smuzhiyun 		/* mov ecx,dreg_hi */
412*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
413*4882a593Smuzhiyun 		/* mov dreg_hi,dreg_lo */
414*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
415*4882a593Smuzhiyun 		/* mov dreg_lo,ecx */
416*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
417*4882a593Smuzhiyun 
418*4882a593Smuzhiyun 		break;
419*4882a593Smuzhiyun 	}
420*4882a593Smuzhiyun 	if (dstk) {
421*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
422*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
423*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
424*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
425*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
426*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
427*4882a593Smuzhiyun 	}
428*4882a593Smuzhiyun 	*pprog = prog;
429*4882a593Smuzhiyun }
430*4882a593Smuzhiyun 
431*4882a593Smuzhiyun /*
432*4882a593Smuzhiyun  * ALU operation (32 bit)
433*4882a593Smuzhiyun  * dst = dst (div|mod) src
434*4882a593Smuzhiyun  */
emit_ia32_div_mod_r(const u8 op,const u8 dst,const u8 src,bool dstk,bool sstk,u8 ** pprog)435*4882a593Smuzhiyun static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
436*4882a593Smuzhiyun 				       bool dstk, bool sstk, u8 **pprog)
437*4882a593Smuzhiyun {
438*4882a593Smuzhiyun 	u8 *prog = *pprog;
439*4882a593Smuzhiyun 	int cnt = 0;
440*4882a593Smuzhiyun 
441*4882a593Smuzhiyun 	if (sstk)
442*4882a593Smuzhiyun 		/* mov ecx,dword ptr [ebp+off] */
443*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
444*4882a593Smuzhiyun 		      STACK_VAR(src));
445*4882a593Smuzhiyun 	else if (src != IA32_ECX)
446*4882a593Smuzhiyun 		/* mov ecx,src */
447*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
448*4882a593Smuzhiyun 
449*4882a593Smuzhiyun 	if (dstk)
450*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
451*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
452*4882a593Smuzhiyun 		      STACK_VAR(dst));
453*4882a593Smuzhiyun 	else
454*4882a593Smuzhiyun 		/* mov eax,dst */
455*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
456*4882a593Smuzhiyun 
457*4882a593Smuzhiyun 	/* xor edx,edx */
458*4882a593Smuzhiyun 	EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
459*4882a593Smuzhiyun 	/* div ecx */
460*4882a593Smuzhiyun 	EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
461*4882a593Smuzhiyun 
462*4882a593Smuzhiyun 	if (op == BPF_MOD) {
463*4882a593Smuzhiyun 		if (dstk)
464*4882a593Smuzhiyun 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
465*4882a593Smuzhiyun 			      STACK_VAR(dst));
466*4882a593Smuzhiyun 		else
467*4882a593Smuzhiyun 			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
468*4882a593Smuzhiyun 	} else {
469*4882a593Smuzhiyun 		if (dstk)
470*4882a593Smuzhiyun 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
471*4882a593Smuzhiyun 			      STACK_VAR(dst));
472*4882a593Smuzhiyun 		else
473*4882a593Smuzhiyun 			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
474*4882a593Smuzhiyun 	}
475*4882a593Smuzhiyun 	*pprog = prog;
476*4882a593Smuzhiyun }
477*4882a593Smuzhiyun 
478*4882a593Smuzhiyun /*
479*4882a593Smuzhiyun  * ALU operation (32 bit)
480*4882a593Smuzhiyun  * dst = dst (shift) src
481*4882a593Smuzhiyun  */
emit_ia32_shift_r(const u8 op,const u8 dst,const u8 src,bool dstk,bool sstk,u8 ** pprog)482*4882a593Smuzhiyun static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
483*4882a593Smuzhiyun 				     bool dstk, bool sstk, u8 **pprog)
484*4882a593Smuzhiyun {
485*4882a593Smuzhiyun 	u8 *prog = *pprog;
486*4882a593Smuzhiyun 	int cnt = 0;
487*4882a593Smuzhiyun 	u8 dreg = dstk ? IA32_EAX : dst;
488*4882a593Smuzhiyun 	u8 b2;
489*4882a593Smuzhiyun 
490*4882a593Smuzhiyun 	if (dstk)
491*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
492*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
493*4882a593Smuzhiyun 
494*4882a593Smuzhiyun 	if (sstk)
495*4882a593Smuzhiyun 		/* mov ecx,dword ptr [ebp+off] */
496*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
497*4882a593Smuzhiyun 	else if (src != IA32_ECX)
498*4882a593Smuzhiyun 		/* mov ecx,src */
499*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
500*4882a593Smuzhiyun 
501*4882a593Smuzhiyun 	switch (op) {
502*4882a593Smuzhiyun 	case BPF_LSH:
503*4882a593Smuzhiyun 		b2 = 0xE0; break;
504*4882a593Smuzhiyun 	case BPF_RSH:
505*4882a593Smuzhiyun 		b2 = 0xE8; break;
506*4882a593Smuzhiyun 	case BPF_ARSH:
507*4882a593Smuzhiyun 		b2 = 0xF8; break;
508*4882a593Smuzhiyun 	default:
509*4882a593Smuzhiyun 		return;
510*4882a593Smuzhiyun 	}
511*4882a593Smuzhiyun 	EMIT2(0xD3, add_1reg(b2, dreg));
512*4882a593Smuzhiyun 
513*4882a593Smuzhiyun 	if (dstk)
514*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg */
515*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
516*4882a593Smuzhiyun 	*pprog = prog;
517*4882a593Smuzhiyun }
518*4882a593Smuzhiyun 
519*4882a593Smuzhiyun /*
520*4882a593Smuzhiyun  * ALU operation (32 bit)
521*4882a593Smuzhiyun  * dst = dst (op) src
522*4882a593Smuzhiyun  */
emit_ia32_alu_r(const bool is64,const bool hi,const u8 op,const u8 dst,const u8 src,bool dstk,bool sstk,u8 ** pprog)523*4882a593Smuzhiyun static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
524*4882a593Smuzhiyun 				   const u8 dst, const u8 src, bool dstk,
525*4882a593Smuzhiyun 				   bool sstk, u8 **pprog)
526*4882a593Smuzhiyun {
527*4882a593Smuzhiyun 	u8 *prog = *pprog;
528*4882a593Smuzhiyun 	int cnt = 0;
529*4882a593Smuzhiyun 	u8 sreg = sstk ? IA32_EAX : src;
530*4882a593Smuzhiyun 	u8 dreg = dstk ? IA32_EDX : dst;
531*4882a593Smuzhiyun 
532*4882a593Smuzhiyun 	if (sstk)
533*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
534*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
535*4882a593Smuzhiyun 
536*4882a593Smuzhiyun 	if (dstk)
537*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
538*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
539*4882a593Smuzhiyun 
540*4882a593Smuzhiyun 	switch (BPF_OP(op)) {
541*4882a593Smuzhiyun 	/* dst = dst + src */
542*4882a593Smuzhiyun 	case BPF_ADD:
543*4882a593Smuzhiyun 		if (hi && is64)
544*4882a593Smuzhiyun 			EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
545*4882a593Smuzhiyun 		else
546*4882a593Smuzhiyun 			EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
547*4882a593Smuzhiyun 		break;
548*4882a593Smuzhiyun 	/* dst = dst - src */
549*4882a593Smuzhiyun 	case BPF_SUB:
550*4882a593Smuzhiyun 		if (hi && is64)
551*4882a593Smuzhiyun 			EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
552*4882a593Smuzhiyun 		else
553*4882a593Smuzhiyun 			EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
554*4882a593Smuzhiyun 		break;
555*4882a593Smuzhiyun 	/* dst = dst | src */
556*4882a593Smuzhiyun 	case BPF_OR:
557*4882a593Smuzhiyun 		EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
558*4882a593Smuzhiyun 		break;
559*4882a593Smuzhiyun 	/* dst = dst & src */
560*4882a593Smuzhiyun 	case BPF_AND:
561*4882a593Smuzhiyun 		EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
562*4882a593Smuzhiyun 		break;
563*4882a593Smuzhiyun 	/* dst = dst ^ src */
564*4882a593Smuzhiyun 	case BPF_XOR:
565*4882a593Smuzhiyun 		EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
566*4882a593Smuzhiyun 		break;
567*4882a593Smuzhiyun 	}
568*4882a593Smuzhiyun 
569*4882a593Smuzhiyun 	if (dstk)
570*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg */
571*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
572*4882a593Smuzhiyun 		      STACK_VAR(dst));
573*4882a593Smuzhiyun 	*pprog = prog;
574*4882a593Smuzhiyun }
575*4882a593Smuzhiyun 
576*4882a593Smuzhiyun /* ALU operation (64 bit) */
emit_ia32_alu_r64(const bool is64,const u8 op,const u8 dst[],const u8 src[],bool dstk,bool sstk,u8 ** pprog,const struct bpf_prog_aux * aux)577*4882a593Smuzhiyun static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
578*4882a593Smuzhiyun 				     const u8 dst[], const u8 src[],
579*4882a593Smuzhiyun 				     bool dstk,  bool sstk,
580*4882a593Smuzhiyun 				     u8 **pprog, const struct bpf_prog_aux *aux)
581*4882a593Smuzhiyun {
582*4882a593Smuzhiyun 	u8 *prog = *pprog;
583*4882a593Smuzhiyun 
584*4882a593Smuzhiyun 	emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
585*4882a593Smuzhiyun 	if (is64)
586*4882a593Smuzhiyun 		emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
587*4882a593Smuzhiyun 				&prog);
588*4882a593Smuzhiyun 	else if (!aux->verifier_zext)
589*4882a593Smuzhiyun 		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
590*4882a593Smuzhiyun 	*pprog = prog;
591*4882a593Smuzhiyun }
592*4882a593Smuzhiyun 
593*4882a593Smuzhiyun /*
594*4882a593Smuzhiyun  * ALU operation (32 bit)
595*4882a593Smuzhiyun  * dst = dst (op) val
596*4882a593Smuzhiyun  */
emit_ia32_alu_i(const bool is64,const bool hi,const u8 op,const u8 dst,const s32 val,bool dstk,u8 ** pprog)597*4882a593Smuzhiyun static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
598*4882a593Smuzhiyun 				   const u8 dst, const s32 val, bool dstk,
599*4882a593Smuzhiyun 				   u8 **pprog)
600*4882a593Smuzhiyun {
601*4882a593Smuzhiyun 	u8 *prog = *pprog;
602*4882a593Smuzhiyun 	int cnt = 0;
603*4882a593Smuzhiyun 	u8 dreg = dstk ? IA32_EAX : dst;
604*4882a593Smuzhiyun 	u8 sreg = IA32_EDX;
605*4882a593Smuzhiyun 
606*4882a593Smuzhiyun 	if (dstk)
607*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
608*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
609*4882a593Smuzhiyun 
610*4882a593Smuzhiyun 	if (!is_imm8(val))
611*4882a593Smuzhiyun 		/* mov edx,imm32*/
612*4882a593Smuzhiyun 		EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
613*4882a593Smuzhiyun 
614*4882a593Smuzhiyun 	switch (op) {
615*4882a593Smuzhiyun 	/* dst = dst + val */
616*4882a593Smuzhiyun 	case BPF_ADD:
617*4882a593Smuzhiyun 		if (hi && is64) {
618*4882a593Smuzhiyun 			if (is_imm8(val))
619*4882a593Smuzhiyun 				EMIT3(0x83, add_1reg(0xD0, dreg), val);
620*4882a593Smuzhiyun 			else
621*4882a593Smuzhiyun 				EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
622*4882a593Smuzhiyun 		} else {
623*4882a593Smuzhiyun 			if (is_imm8(val))
624*4882a593Smuzhiyun 				EMIT3(0x83, add_1reg(0xC0, dreg), val);
625*4882a593Smuzhiyun 			else
626*4882a593Smuzhiyun 				EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
627*4882a593Smuzhiyun 		}
628*4882a593Smuzhiyun 		break;
629*4882a593Smuzhiyun 	/* dst = dst - val */
630*4882a593Smuzhiyun 	case BPF_SUB:
631*4882a593Smuzhiyun 		if (hi && is64) {
632*4882a593Smuzhiyun 			if (is_imm8(val))
633*4882a593Smuzhiyun 				EMIT3(0x83, add_1reg(0xD8, dreg), val);
634*4882a593Smuzhiyun 			else
635*4882a593Smuzhiyun 				EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
636*4882a593Smuzhiyun 		} else {
637*4882a593Smuzhiyun 			if (is_imm8(val))
638*4882a593Smuzhiyun 				EMIT3(0x83, add_1reg(0xE8, dreg), val);
639*4882a593Smuzhiyun 			else
640*4882a593Smuzhiyun 				EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
641*4882a593Smuzhiyun 		}
642*4882a593Smuzhiyun 		break;
643*4882a593Smuzhiyun 	/* dst = dst | val */
644*4882a593Smuzhiyun 	case BPF_OR:
645*4882a593Smuzhiyun 		if (is_imm8(val))
646*4882a593Smuzhiyun 			EMIT3(0x83, add_1reg(0xC8, dreg), val);
647*4882a593Smuzhiyun 		else
648*4882a593Smuzhiyun 			EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
649*4882a593Smuzhiyun 		break;
650*4882a593Smuzhiyun 	/* dst = dst & val */
651*4882a593Smuzhiyun 	case BPF_AND:
652*4882a593Smuzhiyun 		if (is_imm8(val))
653*4882a593Smuzhiyun 			EMIT3(0x83, add_1reg(0xE0, dreg), val);
654*4882a593Smuzhiyun 		else
655*4882a593Smuzhiyun 			EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
656*4882a593Smuzhiyun 		break;
657*4882a593Smuzhiyun 	/* dst = dst ^ val */
658*4882a593Smuzhiyun 	case BPF_XOR:
659*4882a593Smuzhiyun 		if (is_imm8(val))
660*4882a593Smuzhiyun 			EMIT3(0x83, add_1reg(0xF0, dreg), val);
661*4882a593Smuzhiyun 		else
662*4882a593Smuzhiyun 			EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
663*4882a593Smuzhiyun 		break;
664*4882a593Smuzhiyun 	case BPF_NEG:
665*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xD8, dreg));
666*4882a593Smuzhiyun 		break;
667*4882a593Smuzhiyun 	}
668*4882a593Smuzhiyun 
669*4882a593Smuzhiyun 	if (dstk)
670*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg */
671*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
672*4882a593Smuzhiyun 		      STACK_VAR(dst));
673*4882a593Smuzhiyun 	*pprog = prog;
674*4882a593Smuzhiyun }
675*4882a593Smuzhiyun 
676*4882a593Smuzhiyun /* ALU operation (64 bit) */
emit_ia32_alu_i64(const bool is64,const u8 op,const u8 dst[],const u32 val,bool dstk,u8 ** pprog,const struct bpf_prog_aux * aux)677*4882a593Smuzhiyun static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
678*4882a593Smuzhiyun 				     const u8 dst[], const u32 val,
679*4882a593Smuzhiyun 				     bool dstk, u8 **pprog,
680*4882a593Smuzhiyun 				     const struct bpf_prog_aux *aux)
681*4882a593Smuzhiyun {
682*4882a593Smuzhiyun 	u8 *prog = *pprog;
683*4882a593Smuzhiyun 	u32 hi = 0;
684*4882a593Smuzhiyun 
685*4882a593Smuzhiyun 	if (is64 && (val & (1<<31)))
686*4882a593Smuzhiyun 		hi = (u32)~0;
687*4882a593Smuzhiyun 
688*4882a593Smuzhiyun 	emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
689*4882a593Smuzhiyun 	if (is64)
690*4882a593Smuzhiyun 		emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
691*4882a593Smuzhiyun 	else if (!aux->verifier_zext)
692*4882a593Smuzhiyun 		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
693*4882a593Smuzhiyun 
694*4882a593Smuzhiyun 	*pprog = prog;
695*4882a593Smuzhiyun }
696*4882a593Smuzhiyun 
697*4882a593Smuzhiyun /* dst = ~dst (64 bit) */
emit_ia32_neg64(const u8 dst[],bool dstk,u8 ** pprog)698*4882a593Smuzhiyun static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
699*4882a593Smuzhiyun {
700*4882a593Smuzhiyun 	u8 *prog = *pprog;
701*4882a593Smuzhiyun 	int cnt = 0;
702*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
703*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
704*4882a593Smuzhiyun 
705*4882a593Smuzhiyun 	if (dstk) {
706*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
707*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
708*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
709*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
710*4882a593Smuzhiyun 	}
711*4882a593Smuzhiyun 
712*4882a593Smuzhiyun 	/* neg dreg_lo */
713*4882a593Smuzhiyun 	EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
714*4882a593Smuzhiyun 	/* adc dreg_hi,0x0 */
715*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
716*4882a593Smuzhiyun 	/* neg dreg_hi */
717*4882a593Smuzhiyun 	EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
718*4882a593Smuzhiyun 
719*4882a593Smuzhiyun 	if (dstk) {
720*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
721*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
722*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
723*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
724*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
725*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
726*4882a593Smuzhiyun 	}
727*4882a593Smuzhiyun 	*pprog = prog;
728*4882a593Smuzhiyun }
729*4882a593Smuzhiyun 
730*4882a593Smuzhiyun /* dst = dst << src */
emit_ia32_lsh_r64(const u8 dst[],const u8 src[],bool dstk,bool sstk,u8 ** pprog)731*4882a593Smuzhiyun static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
732*4882a593Smuzhiyun 				     bool dstk, bool sstk, u8 **pprog)
733*4882a593Smuzhiyun {
734*4882a593Smuzhiyun 	u8 *prog = *pprog;
735*4882a593Smuzhiyun 	int cnt = 0;
736*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
737*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	if (dstk) {
740*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
741*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
742*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
743*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
744*4882a593Smuzhiyun 	}
745*4882a593Smuzhiyun 
746*4882a593Smuzhiyun 	if (sstk)
747*4882a593Smuzhiyun 		/* mov ecx,dword ptr [ebp+off] */
748*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
749*4882a593Smuzhiyun 		      STACK_VAR(src_lo));
750*4882a593Smuzhiyun 	else
751*4882a593Smuzhiyun 		/* mov ecx,src_lo */
752*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
753*4882a593Smuzhiyun 
754*4882a593Smuzhiyun 	/* shld dreg_hi,dreg_lo,cl */
755*4882a593Smuzhiyun 	EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
756*4882a593Smuzhiyun 	/* shl dreg_lo,cl */
757*4882a593Smuzhiyun 	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
758*4882a593Smuzhiyun 
759*4882a593Smuzhiyun 	/* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
760*4882a593Smuzhiyun 
761*4882a593Smuzhiyun 	/* cmp ecx,32 */
762*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
763*4882a593Smuzhiyun 	/* skip the next two instructions (4 bytes) when < 32 */
764*4882a593Smuzhiyun 	EMIT2(IA32_JB, 4);
765*4882a593Smuzhiyun 
766*4882a593Smuzhiyun 	/* mov dreg_hi,dreg_lo */
767*4882a593Smuzhiyun 	EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
768*4882a593Smuzhiyun 	/* xor dreg_lo,dreg_lo */
769*4882a593Smuzhiyun 	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun 	if (dstk) {
772*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
773*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
774*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
775*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
776*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
777*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
778*4882a593Smuzhiyun 	}
779*4882a593Smuzhiyun 	/* out: */
780*4882a593Smuzhiyun 	*pprog = prog;
781*4882a593Smuzhiyun }
782*4882a593Smuzhiyun 
783*4882a593Smuzhiyun /* dst = dst >> src (signed)*/
emit_ia32_arsh_r64(const u8 dst[],const u8 src[],bool dstk,bool sstk,u8 ** pprog)784*4882a593Smuzhiyun static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
785*4882a593Smuzhiyun 				      bool dstk, bool sstk, u8 **pprog)
786*4882a593Smuzhiyun {
787*4882a593Smuzhiyun 	u8 *prog = *pprog;
788*4882a593Smuzhiyun 	int cnt = 0;
789*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
790*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
791*4882a593Smuzhiyun 
792*4882a593Smuzhiyun 	if (dstk) {
793*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
794*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
795*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
796*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
797*4882a593Smuzhiyun 	}
798*4882a593Smuzhiyun 
799*4882a593Smuzhiyun 	if (sstk)
800*4882a593Smuzhiyun 		/* mov ecx,dword ptr [ebp+off] */
801*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
802*4882a593Smuzhiyun 		      STACK_VAR(src_lo));
803*4882a593Smuzhiyun 	else
804*4882a593Smuzhiyun 		/* mov ecx,src_lo */
805*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
806*4882a593Smuzhiyun 
807*4882a593Smuzhiyun 	/* shrd dreg_lo,dreg_hi,cl */
808*4882a593Smuzhiyun 	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
809*4882a593Smuzhiyun 	/* sar dreg_hi,cl */
810*4882a593Smuzhiyun 	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
811*4882a593Smuzhiyun 
812*4882a593Smuzhiyun 	/* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
813*4882a593Smuzhiyun 
814*4882a593Smuzhiyun 	/* cmp ecx,32 */
815*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
816*4882a593Smuzhiyun 	/* skip the next two instructions (5 bytes) when < 32 */
817*4882a593Smuzhiyun 	EMIT2(IA32_JB, 5);
818*4882a593Smuzhiyun 
819*4882a593Smuzhiyun 	/* mov dreg_lo,dreg_hi */
820*4882a593Smuzhiyun 	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
821*4882a593Smuzhiyun 	/* sar dreg_hi,31 */
822*4882a593Smuzhiyun 	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 	if (dstk) {
825*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
826*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
827*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
828*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
829*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
830*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
831*4882a593Smuzhiyun 	}
832*4882a593Smuzhiyun 	/* out: */
833*4882a593Smuzhiyun 	*pprog = prog;
834*4882a593Smuzhiyun }
835*4882a593Smuzhiyun 
836*4882a593Smuzhiyun /* dst = dst >> src */
emit_ia32_rsh_r64(const u8 dst[],const u8 src[],bool dstk,bool sstk,u8 ** pprog)837*4882a593Smuzhiyun static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
838*4882a593Smuzhiyun 				     bool sstk, u8 **pprog)
839*4882a593Smuzhiyun {
840*4882a593Smuzhiyun 	u8 *prog = *pprog;
841*4882a593Smuzhiyun 	int cnt = 0;
842*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
843*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
844*4882a593Smuzhiyun 
845*4882a593Smuzhiyun 	if (dstk) {
846*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
847*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
848*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
849*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
850*4882a593Smuzhiyun 	}
851*4882a593Smuzhiyun 
852*4882a593Smuzhiyun 	if (sstk)
853*4882a593Smuzhiyun 		/* mov ecx,dword ptr [ebp+off] */
854*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
855*4882a593Smuzhiyun 		      STACK_VAR(src_lo));
856*4882a593Smuzhiyun 	else
857*4882a593Smuzhiyun 		/* mov ecx,src_lo */
858*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
859*4882a593Smuzhiyun 
860*4882a593Smuzhiyun 	/* shrd dreg_lo,dreg_hi,cl */
861*4882a593Smuzhiyun 	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
862*4882a593Smuzhiyun 	/* shr dreg_hi,cl */
863*4882a593Smuzhiyun 	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
864*4882a593Smuzhiyun 
865*4882a593Smuzhiyun 	/* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
866*4882a593Smuzhiyun 
867*4882a593Smuzhiyun 	/* cmp ecx,32 */
868*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
869*4882a593Smuzhiyun 	/* skip the next two instructions (4 bytes) when < 32 */
870*4882a593Smuzhiyun 	EMIT2(IA32_JB, 4);
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 	/* mov dreg_lo,dreg_hi */
873*4882a593Smuzhiyun 	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
874*4882a593Smuzhiyun 	/* xor dreg_hi,dreg_hi */
875*4882a593Smuzhiyun 	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
876*4882a593Smuzhiyun 
877*4882a593Smuzhiyun 	if (dstk) {
878*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
879*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
880*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
881*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
882*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
883*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
884*4882a593Smuzhiyun 	}
885*4882a593Smuzhiyun 	/* out: */
886*4882a593Smuzhiyun 	*pprog = prog;
887*4882a593Smuzhiyun }
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun /* dst = dst << val */
emit_ia32_lsh_i64(const u8 dst[],const u32 val,bool dstk,u8 ** pprog)890*4882a593Smuzhiyun static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
891*4882a593Smuzhiyun 				     bool dstk, u8 **pprog)
892*4882a593Smuzhiyun {
893*4882a593Smuzhiyun 	u8 *prog = *pprog;
894*4882a593Smuzhiyun 	int cnt = 0;
895*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
896*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
897*4882a593Smuzhiyun 
898*4882a593Smuzhiyun 	if (dstk) {
899*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
900*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
901*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
902*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
903*4882a593Smuzhiyun 	}
904*4882a593Smuzhiyun 	/* Do LSH operation */
905*4882a593Smuzhiyun 	if (val < 32) {
906*4882a593Smuzhiyun 		/* shld dreg_hi,dreg_lo,imm8 */
907*4882a593Smuzhiyun 		EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
908*4882a593Smuzhiyun 		/* shl dreg_lo,imm8 */
909*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
910*4882a593Smuzhiyun 	} else if (val >= 32 && val < 64) {
911*4882a593Smuzhiyun 		u32 value = val - 32;
912*4882a593Smuzhiyun 
913*4882a593Smuzhiyun 		/* shl dreg_lo,imm8 */
914*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
915*4882a593Smuzhiyun 		/* mov dreg_hi,dreg_lo */
916*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
917*4882a593Smuzhiyun 		/* xor dreg_lo,dreg_lo */
918*4882a593Smuzhiyun 		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
919*4882a593Smuzhiyun 	} else {
920*4882a593Smuzhiyun 		/* xor dreg_lo,dreg_lo */
921*4882a593Smuzhiyun 		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
922*4882a593Smuzhiyun 		/* xor dreg_hi,dreg_hi */
923*4882a593Smuzhiyun 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
924*4882a593Smuzhiyun 	}
925*4882a593Smuzhiyun 
926*4882a593Smuzhiyun 	if (dstk) {
927*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
928*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
929*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
930*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
931*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
932*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
933*4882a593Smuzhiyun 	}
934*4882a593Smuzhiyun 	*pprog = prog;
935*4882a593Smuzhiyun }
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun /* dst = dst >> val */
emit_ia32_rsh_i64(const u8 dst[],const u32 val,bool dstk,u8 ** pprog)938*4882a593Smuzhiyun static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
939*4882a593Smuzhiyun 				     bool dstk, u8 **pprog)
940*4882a593Smuzhiyun {
941*4882a593Smuzhiyun 	u8 *prog = *pprog;
942*4882a593Smuzhiyun 	int cnt = 0;
943*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
944*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
945*4882a593Smuzhiyun 
946*4882a593Smuzhiyun 	if (dstk) {
947*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
948*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
949*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
950*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
951*4882a593Smuzhiyun 	}
952*4882a593Smuzhiyun 
953*4882a593Smuzhiyun 	/* Do RSH operation */
954*4882a593Smuzhiyun 	if (val < 32) {
955*4882a593Smuzhiyun 		/* shrd dreg_lo,dreg_hi,imm8 */
956*4882a593Smuzhiyun 		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
957*4882a593Smuzhiyun 		/* shr dreg_hi,imm8 */
958*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
959*4882a593Smuzhiyun 	} else if (val >= 32 && val < 64) {
960*4882a593Smuzhiyun 		u32 value = val - 32;
961*4882a593Smuzhiyun 
962*4882a593Smuzhiyun 		/* shr dreg_hi,imm8 */
963*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
964*4882a593Smuzhiyun 		/* mov dreg_lo,dreg_hi */
965*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
966*4882a593Smuzhiyun 		/* xor dreg_hi,dreg_hi */
967*4882a593Smuzhiyun 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
968*4882a593Smuzhiyun 	} else {
969*4882a593Smuzhiyun 		/* xor dreg_lo,dreg_lo */
970*4882a593Smuzhiyun 		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
971*4882a593Smuzhiyun 		/* xor dreg_hi,dreg_hi */
972*4882a593Smuzhiyun 		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
973*4882a593Smuzhiyun 	}
974*4882a593Smuzhiyun 
975*4882a593Smuzhiyun 	if (dstk) {
976*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
977*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
978*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
979*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
980*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
981*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
982*4882a593Smuzhiyun 	}
983*4882a593Smuzhiyun 	*pprog = prog;
984*4882a593Smuzhiyun }
985*4882a593Smuzhiyun 
986*4882a593Smuzhiyun /* dst = dst >> val (signed) */
emit_ia32_arsh_i64(const u8 dst[],const u32 val,bool dstk,u8 ** pprog)987*4882a593Smuzhiyun static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
988*4882a593Smuzhiyun 				      bool dstk, u8 **pprog)
989*4882a593Smuzhiyun {
990*4882a593Smuzhiyun 	u8 *prog = *pprog;
991*4882a593Smuzhiyun 	int cnt = 0;
992*4882a593Smuzhiyun 	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
993*4882a593Smuzhiyun 	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
994*4882a593Smuzhiyun 
995*4882a593Smuzhiyun 	if (dstk) {
996*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
997*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
998*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
999*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
1000*4882a593Smuzhiyun 	}
1001*4882a593Smuzhiyun 	/* Do RSH operation */
1002*4882a593Smuzhiyun 	if (val < 32) {
1003*4882a593Smuzhiyun 		/* shrd dreg_lo,dreg_hi,imm8 */
1004*4882a593Smuzhiyun 		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
1005*4882a593Smuzhiyun 		/* ashr dreg_hi,imm8 */
1006*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1007*4882a593Smuzhiyun 	} else if (val >= 32 && val < 64) {
1008*4882a593Smuzhiyun 		u32 value = val - 32;
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun 		/* ashr dreg_hi,imm8 */
1011*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1012*4882a593Smuzhiyun 		/* mov dreg_lo,dreg_hi */
1013*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1014*4882a593Smuzhiyun 
1015*4882a593Smuzhiyun 		/* ashr dreg_hi,imm8 */
1016*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1017*4882a593Smuzhiyun 	} else {
1018*4882a593Smuzhiyun 		/* ashr dreg_hi,imm8 */
1019*4882a593Smuzhiyun 		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1020*4882a593Smuzhiyun 		/* mov dreg_lo,dreg_hi */
1021*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1022*4882a593Smuzhiyun 	}
1023*4882a593Smuzhiyun 
1024*4882a593Smuzhiyun 	if (dstk) {
1025*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_lo */
1026*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1027*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
1028*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],dreg_hi */
1029*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1030*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
1031*4882a593Smuzhiyun 	}
1032*4882a593Smuzhiyun 	*pprog = prog;
1033*4882a593Smuzhiyun }
1034*4882a593Smuzhiyun 
emit_ia32_mul_r64(const u8 dst[],const u8 src[],bool dstk,bool sstk,u8 ** pprog)1035*4882a593Smuzhiyun static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1036*4882a593Smuzhiyun 				     bool sstk, u8 **pprog)
1037*4882a593Smuzhiyun {
1038*4882a593Smuzhiyun 	u8 *prog = *pprog;
1039*4882a593Smuzhiyun 	int cnt = 0;
1040*4882a593Smuzhiyun 
1041*4882a593Smuzhiyun 	if (dstk)
1042*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
1043*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1044*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
1045*4882a593Smuzhiyun 	else
1046*4882a593Smuzhiyun 		/* mov eax,dst_hi */
1047*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1048*4882a593Smuzhiyun 
1049*4882a593Smuzhiyun 	if (sstk)
1050*4882a593Smuzhiyun 		/* mul dword ptr [ebp+off] */
1051*4882a593Smuzhiyun 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1052*4882a593Smuzhiyun 	else
1053*4882a593Smuzhiyun 		/* mul src_lo */
1054*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1055*4882a593Smuzhiyun 
1056*4882a593Smuzhiyun 	/* mov ecx,eax */
1057*4882a593Smuzhiyun 	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1058*4882a593Smuzhiyun 
1059*4882a593Smuzhiyun 	if (dstk)
1060*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
1061*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1062*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
1063*4882a593Smuzhiyun 	else
1064*4882a593Smuzhiyun 		/* mov eax,dst_lo */
1065*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1066*4882a593Smuzhiyun 
1067*4882a593Smuzhiyun 	if (sstk)
1068*4882a593Smuzhiyun 		/* mul dword ptr [ebp+off] */
1069*4882a593Smuzhiyun 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1070*4882a593Smuzhiyun 	else
1071*4882a593Smuzhiyun 		/* mul src_hi */
1072*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xE0, src_hi));
1073*4882a593Smuzhiyun 
1074*4882a593Smuzhiyun 	/* add eax,eax */
1075*4882a593Smuzhiyun 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1076*4882a593Smuzhiyun 
1077*4882a593Smuzhiyun 	if (dstk)
1078*4882a593Smuzhiyun 		/* mov eax,dword ptr [ebp+off] */
1079*4882a593Smuzhiyun 		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1080*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
1081*4882a593Smuzhiyun 	else
1082*4882a593Smuzhiyun 		/* mov eax,dst_lo */
1083*4882a593Smuzhiyun 		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun 	if (sstk)
1086*4882a593Smuzhiyun 		/* mul dword ptr [ebp+off] */
1087*4882a593Smuzhiyun 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1088*4882a593Smuzhiyun 	else
1089*4882a593Smuzhiyun 		/* mul src_lo */
1090*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1091*4882a593Smuzhiyun 
1092*4882a593Smuzhiyun 	/* add ecx,edx */
1093*4882a593Smuzhiyun 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1094*4882a593Smuzhiyun 
1095*4882a593Smuzhiyun 	if (dstk) {
1096*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],eax */
1097*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1098*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
1099*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],ecx */
1100*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1101*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
1102*4882a593Smuzhiyun 	} else {
1103*4882a593Smuzhiyun 		/* mov dst_lo,eax */
1104*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1105*4882a593Smuzhiyun 		/* mov dst_hi,ecx */
1106*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1107*4882a593Smuzhiyun 	}
1108*4882a593Smuzhiyun 
1109*4882a593Smuzhiyun 	*pprog = prog;
1110*4882a593Smuzhiyun }
1111*4882a593Smuzhiyun 
emit_ia32_mul_i64(const u8 dst[],const u32 val,bool dstk,u8 ** pprog)1112*4882a593Smuzhiyun static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1113*4882a593Smuzhiyun 				     bool dstk, u8 **pprog)
1114*4882a593Smuzhiyun {
1115*4882a593Smuzhiyun 	u8 *prog = *pprog;
1116*4882a593Smuzhiyun 	int cnt = 0;
1117*4882a593Smuzhiyun 	u32 hi;
1118*4882a593Smuzhiyun 
1119*4882a593Smuzhiyun 	hi = val & (1<<31) ? (u32)~0 : 0;
1120*4882a593Smuzhiyun 	/* movl eax,imm32 */
1121*4882a593Smuzhiyun 	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1122*4882a593Smuzhiyun 	if (dstk)
1123*4882a593Smuzhiyun 		/* mul dword ptr [ebp+off] */
1124*4882a593Smuzhiyun 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1125*4882a593Smuzhiyun 	else
1126*4882a593Smuzhiyun 		/* mul dst_hi */
1127*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1128*4882a593Smuzhiyun 
1129*4882a593Smuzhiyun 	/* mov ecx,eax */
1130*4882a593Smuzhiyun 	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1131*4882a593Smuzhiyun 
1132*4882a593Smuzhiyun 	/* movl eax,imm32 */
1133*4882a593Smuzhiyun 	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1134*4882a593Smuzhiyun 	if (dstk)
1135*4882a593Smuzhiyun 		/* mul dword ptr [ebp+off] */
1136*4882a593Smuzhiyun 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1137*4882a593Smuzhiyun 	else
1138*4882a593Smuzhiyun 		/* mul dst_lo */
1139*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1140*4882a593Smuzhiyun 	/* add ecx,eax */
1141*4882a593Smuzhiyun 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1142*4882a593Smuzhiyun 
1143*4882a593Smuzhiyun 	/* movl eax,imm32 */
1144*4882a593Smuzhiyun 	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1145*4882a593Smuzhiyun 	if (dstk)
1146*4882a593Smuzhiyun 		/* mul dword ptr [ebp+off] */
1147*4882a593Smuzhiyun 		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1148*4882a593Smuzhiyun 	else
1149*4882a593Smuzhiyun 		/* mul dst_lo */
1150*4882a593Smuzhiyun 		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1151*4882a593Smuzhiyun 
1152*4882a593Smuzhiyun 	/* add ecx,edx */
1153*4882a593Smuzhiyun 	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1154*4882a593Smuzhiyun 
1155*4882a593Smuzhiyun 	if (dstk) {
1156*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],eax */
1157*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1158*4882a593Smuzhiyun 		      STACK_VAR(dst_lo));
1159*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],ecx */
1160*4882a593Smuzhiyun 		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1161*4882a593Smuzhiyun 		      STACK_VAR(dst_hi));
1162*4882a593Smuzhiyun 	} else {
1163*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],eax */
1164*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1165*4882a593Smuzhiyun 		/* mov dword ptr [ebp+off],ecx */
1166*4882a593Smuzhiyun 		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1167*4882a593Smuzhiyun 	}
1168*4882a593Smuzhiyun 
1169*4882a593Smuzhiyun 	*pprog = prog;
1170*4882a593Smuzhiyun }
1171*4882a593Smuzhiyun 
bpf_size_to_x86_bytes(int bpf_size)1172*4882a593Smuzhiyun static int bpf_size_to_x86_bytes(int bpf_size)
1173*4882a593Smuzhiyun {
1174*4882a593Smuzhiyun 	if (bpf_size == BPF_W)
1175*4882a593Smuzhiyun 		return 4;
1176*4882a593Smuzhiyun 	else if (bpf_size == BPF_H)
1177*4882a593Smuzhiyun 		return 2;
1178*4882a593Smuzhiyun 	else if (bpf_size == BPF_B)
1179*4882a593Smuzhiyun 		return 1;
1180*4882a593Smuzhiyun 	else if (bpf_size == BPF_DW)
1181*4882a593Smuzhiyun 		return 4; /* imm32 */
1182*4882a593Smuzhiyun 	else
1183*4882a593Smuzhiyun 		return 0;
1184*4882a593Smuzhiyun }
1185*4882a593Smuzhiyun 
1186*4882a593Smuzhiyun struct jit_context {
1187*4882a593Smuzhiyun 	int cleanup_addr; /* Epilogue code offset */
1188*4882a593Smuzhiyun };
1189*4882a593Smuzhiyun 
1190*4882a593Smuzhiyun /* Maximum number of bytes emitted while JITing one eBPF insn */
1191*4882a593Smuzhiyun #define BPF_MAX_INSN_SIZE	128
1192*4882a593Smuzhiyun #define BPF_INSN_SAFETY		64
1193*4882a593Smuzhiyun 
1194*4882a593Smuzhiyun #define PROLOGUE_SIZE 35
1195*4882a593Smuzhiyun 
1196*4882a593Smuzhiyun /*
1197*4882a593Smuzhiyun  * Emit prologue code for BPF program and check it's size.
1198*4882a593Smuzhiyun  * bpf_tail_call helper will skip it while jumping into another program.
1199*4882a593Smuzhiyun  */
emit_prologue(u8 ** pprog,u32 stack_depth)1200*4882a593Smuzhiyun static void emit_prologue(u8 **pprog, u32 stack_depth)
1201*4882a593Smuzhiyun {
1202*4882a593Smuzhiyun 	u8 *prog = *pprog;
1203*4882a593Smuzhiyun 	int cnt = 0;
1204*4882a593Smuzhiyun 	const u8 *r1 = bpf2ia32[BPF_REG_1];
1205*4882a593Smuzhiyun 	const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1206*4882a593Smuzhiyun 	const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1207*4882a593Smuzhiyun 	const u8 *tcc = bpf2ia32[TCALL_CNT];
1208*4882a593Smuzhiyun 
1209*4882a593Smuzhiyun 	/* push ebp */
1210*4882a593Smuzhiyun 	EMIT1(0x55);
1211*4882a593Smuzhiyun 	/* mov ebp,esp */
1212*4882a593Smuzhiyun 	EMIT2(0x89, 0xE5);
1213*4882a593Smuzhiyun 	/* push edi */
1214*4882a593Smuzhiyun 	EMIT1(0x57);
1215*4882a593Smuzhiyun 	/* push esi */
1216*4882a593Smuzhiyun 	EMIT1(0x56);
1217*4882a593Smuzhiyun 	/* push ebx */
1218*4882a593Smuzhiyun 	EMIT1(0x53);
1219*4882a593Smuzhiyun 
1220*4882a593Smuzhiyun 	/* sub esp,STACK_SIZE */
1221*4882a593Smuzhiyun 	EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1222*4882a593Smuzhiyun 	/* sub ebp,SCRATCH_SIZE+12*/
1223*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1224*4882a593Smuzhiyun 	/* xor ebx,ebx */
1225*4882a593Smuzhiyun 	EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1226*4882a593Smuzhiyun 
1227*4882a593Smuzhiyun 	/* Set up BPF prog stack base register */
1228*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1229*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1230*4882a593Smuzhiyun 
1231*4882a593Smuzhiyun 	/* Move BPF_CTX (EAX) to BPF_REG_R1 */
1232*4882a593Smuzhiyun 	/* mov dword ptr [ebp+off],eax */
1233*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1234*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1235*4882a593Smuzhiyun 
1236*4882a593Smuzhiyun 	/* Initialize Tail Count */
1237*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1238*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1239*4882a593Smuzhiyun 
1240*4882a593Smuzhiyun 	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1241*4882a593Smuzhiyun 	*pprog = prog;
1242*4882a593Smuzhiyun }
1243*4882a593Smuzhiyun 
1244*4882a593Smuzhiyun /* Emit epilogue code for BPF program */
emit_epilogue(u8 ** pprog,u32 stack_depth)1245*4882a593Smuzhiyun static void emit_epilogue(u8 **pprog, u32 stack_depth)
1246*4882a593Smuzhiyun {
1247*4882a593Smuzhiyun 	u8 *prog = *pprog;
1248*4882a593Smuzhiyun 	const u8 *r0 = bpf2ia32[BPF_REG_0];
1249*4882a593Smuzhiyun 	int cnt = 0;
1250*4882a593Smuzhiyun 
1251*4882a593Smuzhiyun 	/* mov eax,dword ptr [ebp+off]*/
1252*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1253*4882a593Smuzhiyun 	/* mov edx,dword ptr [ebp+off]*/
1254*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1255*4882a593Smuzhiyun 
1256*4882a593Smuzhiyun 	/* add ebp,SCRATCH_SIZE+12*/
1257*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1258*4882a593Smuzhiyun 
1259*4882a593Smuzhiyun 	/* mov ebx,dword ptr [ebp-12]*/
1260*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1261*4882a593Smuzhiyun 	/* mov esi,dword ptr [ebp-8]*/
1262*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1263*4882a593Smuzhiyun 	/* mov edi,dword ptr [ebp-4]*/
1264*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1265*4882a593Smuzhiyun 
1266*4882a593Smuzhiyun 	EMIT1(0xC9); /* leave */
1267*4882a593Smuzhiyun 	EMIT1(0xC3); /* ret */
1268*4882a593Smuzhiyun 	*pprog = prog;
1269*4882a593Smuzhiyun }
1270*4882a593Smuzhiyun 
emit_jmp_edx(u8 ** pprog,u8 * ip)1271*4882a593Smuzhiyun static int emit_jmp_edx(u8 **pprog, u8 *ip)
1272*4882a593Smuzhiyun {
1273*4882a593Smuzhiyun 	u8 *prog = *pprog;
1274*4882a593Smuzhiyun 	int cnt = 0;
1275*4882a593Smuzhiyun 
1276*4882a593Smuzhiyun #ifdef CONFIG_RETPOLINE
1277*4882a593Smuzhiyun 	EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
1278*4882a593Smuzhiyun #else
1279*4882a593Smuzhiyun 	EMIT2(0xFF, 0xE2);
1280*4882a593Smuzhiyun #endif
1281*4882a593Smuzhiyun 	*pprog = prog;
1282*4882a593Smuzhiyun 
1283*4882a593Smuzhiyun 	return cnt;
1284*4882a593Smuzhiyun }
1285*4882a593Smuzhiyun 
1286*4882a593Smuzhiyun /*
1287*4882a593Smuzhiyun  * Generate the following code:
1288*4882a593Smuzhiyun  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1289*4882a593Smuzhiyun  *   if (index >= array->map.max_entries)
1290*4882a593Smuzhiyun  *     goto out;
1291*4882a593Smuzhiyun  *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1292*4882a593Smuzhiyun  *     goto out;
1293*4882a593Smuzhiyun  *   prog = array->ptrs[index];
1294*4882a593Smuzhiyun  *   if (prog == NULL)
1295*4882a593Smuzhiyun  *     goto out;
1296*4882a593Smuzhiyun  *   goto *(prog->bpf_func + prologue_size);
1297*4882a593Smuzhiyun  * out:
1298*4882a593Smuzhiyun  */
emit_bpf_tail_call(u8 ** pprog,u8 * ip)1299*4882a593Smuzhiyun static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
1300*4882a593Smuzhiyun {
1301*4882a593Smuzhiyun 	u8 *prog = *pprog;
1302*4882a593Smuzhiyun 	int cnt = 0;
1303*4882a593Smuzhiyun 	const u8 *r1 = bpf2ia32[BPF_REG_1];
1304*4882a593Smuzhiyun 	const u8 *r2 = bpf2ia32[BPF_REG_2];
1305*4882a593Smuzhiyun 	const u8 *r3 = bpf2ia32[BPF_REG_3];
1306*4882a593Smuzhiyun 	const u8 *tcc = bpf2ia32[TCALL_CNT];
1307*4882a593Smuzhiyun 	u32 lo, hi;
1308*4882a593Smuzhiyun 	static int jmp_label1 = -1;
1309*4882a593Smuzhiyun 
1310*4882a593Smuzhiyun 	/*
1311*4882a593Smuzhiyun 	 * if (index >= array->map.max_entries)
1312*4882a593Smuzhiyun 	 *     goto out;
1313*4882a593Smuzhiyun 	 */
1314*4882a593Smuzhiyun 	/* mov eax,dword ptr [ebp+off] */
1315*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1316*4882a593Smuzhiyun 	/* mov edx,dword ptr [ebp+off] */
1317*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1318*4882a593Smuzhiyun 
1319*4882a593Smuzhiyun 	/* cmp dword ptr [eax+off],edx */
1320*4882a593Smuzhiyun 	EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1321*4882a593Smuzhiyun 	      offsetof(struct bpf_array, map.max_entries));
1322*4882a593Smuzhiyun 	/* jbe out */
1323*4882a593Smuzhiyun 	EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1324*4882a593Smuzhiyun 
1325*4882a593Smuzhiyun 	/*
1326*4882a593Smuzhiyun 	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1327*4882a593Smuzhiyun 	 *     goto out;
1328*4882a593Smuzhiyun 	 */
1329*4882a593Smuzhiyun 	lo = (u32)MAX_TAIL_CALL_CNT;
1330*4882a593Smuzhiyun 	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1331*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1332*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1333*4882a593Smuzhiyun 
1334*4882a593Smuzhiyun 	/* cmp edx,hi */
1335*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1336*4882a593Smuzhiyun 	EMIT2(IA32_JNE, 3);
1337*4882a593Smuzhiyun 	/* cmp ecx,lo */
1338*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1339*4882a593Smuzhiyun 
1340*4882a593Smuzhiyun 	/* ja out */
1341*4882a593Smuzhiyun 	EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1342*4882a593Smuzhiyun 
1343*4882a593Smuzhiyun 	/* add eax,0x1 */
1344*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1345*4882a593Smuzhiyun 	/* adc ebx,0x0 */
1346*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1347*4882a593Smuzhiyun 
1348*4882a593Smuzhiyun 	/* mov dword ptr [ebp+off],eax */
1349*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1350*4882a593Smuzhiyun 	/* mov dword ptr [ebp+off],edx */
1351*4882a593Smuzhiyun 	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1352*4882a593Smuzhiyun 
1353*4882a593Smuzhiyun 	/* prog = array->ptrs[index]; */
1354*4882a593Smuzhiyun 	/* mov edx, [eax + edx * 4 + offsetof(...)] */
1355*4882a593Smuzhiyun 	EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1356*4882a593Smuzhiyun 
1357*4882a593Smuzhiyun 	/*
1358*4882a593Smuzhiyun 	 * if (prog == NULL)
1359*4882a593Smuzhiyun 	 *     goto out;
1360*4882a593Smuzhiyun 	 */
1361*4882a593Smuzhiyun 	/* test edx,edx */
1362*4882a593Smuzhiyun 	EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1363*4882a593Smuzhiyun 	/* je out */
1364*4882a593Smuzhiyun 	EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1365*4882a593Smuzhiyun 
1366*4882a593Smuzhiyun 	/* goto *(prog->bpf_func + prologue_size); */
1367*4882a593Smuzhiyun 	/* mov edx, dword ptr [edx + 32] */
1368*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1369*4882a593Smuzhiyun 	      offsetof(struct bpf_prog, bpf_func));
1370*4882a593Smuzhiyun 	/* add edx,prologue_size */
1371*4882a593Smuzhiyun 	EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1372*4882a593Smuzhiyun 
1373*4882a593Smuzhiyun 	/* mov eax,dword ptr [ebp+off] */
1374*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1375*4882a593Smuzhiyun 
1376*4882a593Smuzhiyun 	/*
1377*4882a593Smuzhiyun 	 * Now we're ready to jump into next BPF program:
1378*4882a593Smuzhiyun 	 * eax == ctx (1st arg)
1379*4882a593Smuzhiyun 	 * edx == prog->bpf_func + prologue_size
1380*4882a593Smuzhiyun 	 */
1381*4882a593Smuzhiyun 	cnt += emit_jmp_edx(&prog, ip + cnt);
1382*4882a593Smuzhiyun 
1383*4882a593Smuzhiyun 	if (jmp_label1 == -1)
1384*4882a593Smuzhiyun 		jmp_label1 = cnt;
1385*4882a593Smuzhiyun 
1386*4882a593Smuzhiyun 	/* out: */
1387*4882a593Smuzhiyun 	*pprog = prog;
1388*4882a593Smuzhiyun }
1389*4882a593Smuzhiyun 
1390*4882a593Smuzhiyun /* Push the scratch stack register on top of the stack. */
emit_push_r64(const u8 src[],u8 ** pprog)1391*4882a593Smuzhiyun static inline void emit_push_r64(const u8 src[], u8 **pprog)
1392*4882a593Smuzhiyun {
1393*4882a593Smuzhiyun 	u8 *prog = *pprog;
1394*4882a593Smuzhiyun 	int cnt = 0;
1395*4882a593Smuzhiyun 
1396*4882a593Smuzhiyun 	/* mov ecx,dword ptr [ebp+off] */
1397*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1398*4882a593Smuzhiyun 	/* push ecx */
1399*4882a593Smuzhiyun 	EMIT1(0x51);
1400*4882a593Smuzhiyun 
1401*4882a593Smuzhiyun 	/* mov ecx,dword ptr [ebp+off] */
1402*4882a593Smuzhiyun 	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1403*4882a593Smuzhiyun 	/* push ecx */
1404*4882a593Smuzhiyun 	EMIT1(0x51);
1405*4882a593Smuzhiyun 
1406*4882a593Smuzhiyun 	*pprog = prog;
1407*4882a593Smuzhiyun }
1408*4882a593Smuzhiyun 
get_cond_jmp_opcode(const u8 op,bool is_cmp_lo)1409*4882a593Smuzhiyun static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
1410*4882a593Smuzhiyun {
1411*4882a593Smuzhiyun 	u8 jmp_cond;
1412*4882a593Smuzhiyun 
1413*4882a593Smuzhiyun 	/* Convert BPF opcode to x86 */
1414*4882a593Smuzhiyun 	switch (op) {
1415*4882a593Smuzhiyun 	case BPF_JEQ:
1416*4882a593Smuzhiyun 		jmp_cond = IA32_JE;
1417*4882a593Smuzhiyun 		break;
1418*4882a593Smuzhiyun 	case BPF_JSET:
1419*4882a593Smuzhiyun 	case BPF_JNE:
1420*4882a593Smuzhiyun 		jmp_cond = IA32_JNE;
1421*4882a593Smuzhiyun 		break;
1422*4882a593Smuzhiyun 	case BPF_JGT:
1423*4882a593Smuzhiyun 		/* GT is unsigned '>', JA in x86 */
1424*4882a593Smuzhiyun 		jmp_cond = IA32_JA;
1425*4882a593Smuzhiyun 		break;
1426*4882a593Smuzhiyun 	case BPF_JLT:
1427*4882a593Smuzhiyun 		/* LT is unsigned '<', JB in x86 */
1428*4882a593Smuzhiyun 		jmp_cond = IA32_JB;
1429*4882a593Smuzhiyun 		break;
1430*4882a593Smuzhiyun 	case BPF_JGE:
1431*4882a593Smuzhiyun 		/* GE is unsigned '>=', JAE in x86 */
1432*4882a593Smuzhiyun 		jmp_cond = IA32_JAE;
1433*4882a593Smuzhiyun 		break;
1434*4882a593Smuzhiyun 	case BPF_JLE:
1435*4882a593Smuzhiyun 		/* LE is unsigned '<=', JBE in x86 */
1436*4882a593Smuzhiyun 		jmp_cond = IA32_JBE;
1437*4882a593Smuzhiyun 		break;
1438*4882a593Smuzhiyun 	case BPF_JSGT:
1439*4882a593Smuzhiyun 		if (!is_cmp_lo)
1440*4882a593Smuzhiyun 			/* Signed '>', GT in x86 */
1441*4882a593Smuzhiyun 			jmp_cond = IA32_JG;
1442*4882a593Smuzhiyun 		else
1443*4882a593Smuzhiyun 			/* GT is unsigned '>', JA in x86 */
1444*4882a593Smuzhiyun 			jmp_cond = IA32_JA;
1445*4882a593Smuzhiyun 		break;
1446*4882a593Smuzhiyun 	case BPF_JSLT:
1447*4882a593Smuzhiyun 		if (!is_cmp_lo)
1448*4882a593Smuzhiyun 			/* Signed '<', LT in x86 */
1449*4882a593Smuzhiyun 			jmp_cond = IA32_JL;
1450*4882a593Smuzhiyun 		else
1451*4882a593Smuzhiyun 			/* LT is unsigned '<', JB in x86 */
1452*4882a593Smuzhiyun 			jmp_cond = IA32_JB;
1453*4882a593Smuzhiyun 		break;
1454*4882a593Smuzhiyun 	case BPF_JSGE:
1455*4882a593Smuzhiyun 		if (!is_cmp_lo)
1456*4882a593Smuzhiyun 			/* Signed '>=', GE in x86 */
1457*4882a593Smuzhiyun 			jmp_cond = IA32_JGE;
1458*4882a593Smuzhiyun 		else
1459*4882a593Smuzhiyun 			/* GE is unsigned '>=', JAE in x86 */
1460*4882a593Smuzhiyun 			jmp_cond = IA32_JAE;
1461*4882a593Smuzhiyun 		break;
1462*4882a593Smuzhiyun 	case BPF_JSLE:
1463*4882a593Smuzhiyun 		if (!is_cmp_lo)
1464*4882a593Smuzhiyun 			/* Signed '<=', LE in x86 */
1465*4882a593Smuzhiyun 			jmp_cond = IA32_JLE;
1466*4882a593Smuzhiyun 		else
1467*4882a593Smuzhiyun 			/* LE is unsigned '<=', JBE in x86 */
1468*4882a593Smuzhiyun 			jmp_cond = IA32_JBE;
1469*4882a593Smuzhiyun 		break;
1470*4882a593Smuzhiyun 	default: /* to silence GCC warning */
1471*4882a593Smuzhiyun 		jmp_cond = COND_JMP_OPCODE_INVALID;
1472*4882a593Smuzhiyun 		break;
1473*4882a593Smuzhiyun 	}
1474*4882a593Smuzhiyun 
1475*4882a593Smuzhiyun 	return jmp_cond;
1476*4882a593Smuzhiyun }
1477*4882a593Smuzhiyun 
do_jit(struct bpf_prog * bpf_prog,int * addrs,u8 * image,int oldproglen,struct jit_context * ctx)1478*4882a593Smuzhiyun static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1479*4882a593Smuzhiyun 		  int oldproglen, struct jit_context *ctx)
1480*4882a593Smuzhiyun {
1481*4882a593Smuzhiyun 	struct bpf_insn *insn = bpf_prog->insnsi;
1482*4882a593Smuzhiyun 	int insn_cnt = bpf_prog->len;
1483*4882a593Smuzhiyun 	bool seen_exit = false;
1484*4882a593Smuzhiyun 	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1485*4882a593Smuzhiyun 	int i, cnt = 0;
1486*4882a593Smuzhiyun 	int proglen = 0;
1487*4882a593Smuzhiyun 	u8 *prog = temp;
1488*4882a593Smuzhiyun 
1489*4882a593Smuzhiyun 	emit_prologue(&prog, bpf_prog->aux->stack_depth);
1490*4882a593Smuzhiyun 
1491*4882a593Smuzhiyun 	for (i = 0; i < insn_cnt; i++, insn++) {
1492*4882a593Smuzhiyun 		const s32 imm32 = insn->imm;
1493*4882a593Smuzhiyun 		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1494*4882a593Smuzhiyun 		const bool dstk = insn->dst_reg != BPF_REG_AX;
1495*4882a593Smuzhiyun 		const bool sstk = insn->src_reg != BPF_REG_AX;
1496*4882a593Smuzhiyun 		const u8 code = insn->code;
1497*4882a593Smuzhiyun 		const u8 *dst = bpf2ia32[insn->dst_reg];
1498*4882a593Smuzhiyun 		const u8 *src = bpf2ia32[insn->src_reg];
1499*4882a593Smuzhiyun 		const u8 *r0 = bpf2ia32[BPF_REG_0];
1500*4882a593Smuzhiyun 		s64 jmp_offset;
1501*4882a593Smuzhiyun 		u8 jmp_cond;
1502*4882a593Smuzhiyun 		int ilen;
1503*4882a593Smuzhiyun 		u8 *func;
1504*4882a593Smuzhiyun 
1505*4882a593Smuzhiyun 		switch (code) {
1506*4882a593Smuzhiyun 		/* ALU operations */
1507*4882a593Smuzhiyun 		/* dst = src */
1508*4882a593Smuzhiyun 		case BPF_ALU | BPF_MOV | BPF_K:
1509*4882a593Smuzhiyun 		case BPF_ALU | BPF_MOV | BPF_X:
1510*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_MOV | BPF_K:
1511*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_MOV | BPF_X:
1512*4882a593Smuzhiyun 			switch (BPF_SRC(code)) {
1513*4882a593Smuzhiyun 			case BPF_X:
1514*4882a593Smuzhiyun 				if (imm32 == 1) {
1515*4882a593Smuzhiyun 					/* Special mov32 for zext. */
1516*4882a593Smuzhiyun 					emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1517*4882a593Smuzhiyun 					break;
1518*4882a593Smuzhiyun 				}
1519*4882a593Smuzhiyun 				emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
1520*4882a593Smuzhiyun 						  &prog, bpf_prog->aux);
1521*4882a593Smuzhiyun 				break;
1522*4882a593Smuzhiyun 			case BPF_K:
1523*4882a593Smuzhiyun 				/* Sign-extend immediate value to dst reg */
1524*4882a593Smuzhiyun 				emit_ia32_mov_i64(is64, dst, imm32,
1525*4882a593Smuzhiyun 						  dstk, &prog);
1526*4882a593Smuzhiyun 				break;
1527*4882a593Smuzhiyun 			}
1528*4882a593Smuzhiyun 			break;
1529*4882a593Smuzhiyun 		/* dst = dst + src/imm */
1530*4882a593Smuzhiyun 		/* dst = dst - src/imm */
1531*4882a593Smuzhiyun 		/* dst = dst | src/imm */
1532*4882a593Smuzhiyun 		/* dst = dst & src/imm */
1533*4882a593Smuzhiyun 		/* dst = dst ^ src/imm */
1534*4882a593Smuzhiyun 		/* dst = dst * src/imm */
1535*4882a593Smuzhiyun 		/* dst = dst << src */
1536*4882a593Smuzhiyun 		/* dst = dst >> src */
1537*4882a593Smuzhiyun 		case BPF_ALU | BPF_ADD | BPF_K:
1538*4882a593Smuzhiyun 		case BPF_ALU | BPF_ADD | BPF_X:
1539*4882a593Smuzhiyun 		case BPF_ALU | BPF_SUB | BPF_K:
1540*4882a593Smuzhiyun 		case BPF_ALU | BPF_SUB | BPF_X:
1541*4882a593Smuzhiyun 		case BPF_ALU | BPF_OR | BPF_K:
1542*4882a593Smuzhiyun 		case BPF_ALU | BPF_OR | BPF_X:
1543*4882a593Smuzhiyun 		case BPF_ALU | BPF_AND | BPF_K:
1544*4882a593Smuzhiyun 		case BPF_ALU | BPF_AND | BPF_X:
1545*4882a593Smuzhiyun 		case BPF_ALU | BPF_XOR | BPF_K:
1546*4882a593Smuzhiyun 		case BPF_ALU | BPF_XOR | BPF_X:
1547*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_ADD | BPF_K:
1548*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_ADD | BPF_X:
1549*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_SUB | BPF_K:
1550*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_SUB | BPF_X:
1551*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_OR | BPF_K:
1552*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_OR | BPF_X:
1553*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_AND | BPF_K:
1554*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_AND | BPF_X:
1555*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_XOR | BPF_K:
1556*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_XOR | BPF_X:
1557*4882a593Smuzhiyun 			switch (BPF_SRC(code)) {
1558*4882a593Smuzhiyun 			case BPF_X:
1559*4882a593Smuzhiyun 				emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1560*4882a593Smuzhiyun 						  src, dstk, sstk, &prog,
1561*4882a593Smuzhiyun 						  bpf_prog->aux);
1562*4882a593Smuzhiyun 				break;
1563*4882a593Smuzhiyun 			case BPF_K:
1564*4882a593Smuzhiyun 				emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1565*4882a593Smuzhiyun 						  imm32, dstk, &prog,
1566*4882a593Smuzhiyun 						  bpf_prog->aux);
1567*4882a593Smuzhiyun 				break;
1568*4882a593Smuzhiyun 			}
1569*4882a593Smuzhiyun 			break;
1570*4882a593Smuzhiyun 		case BPF_ALU | BPF_MUL | BPF_K:
1571*4882a593Smuzhiyun 		case BPF_ALU | BPF_MUL | BPF_X:
1572*4882a593Smuzhiyun 			switch (BPF_SRC(code)) {
1573*4882a593Smuzhiyun 			case BPF_X:
1574*4882a593Smuzhiyun 				emit_ia32_mul_r(dst_lo, src_lo, dstk,
1575*4882a593Smuzhiyun 						sstk, &prog);
1576*4882a593Smuzhiyun 				break;
1577*4882a593Smuzhiyun 			case BPF_K:
1578*4882a593Smuzhiyun 				/* mov ecx,imm32*/
1579*4882a593Smuzhiyun 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1580*4882a593Smuzhiyun 					    imm32);
1581*4882a593Smuzhiyun 				emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1582*4882a593Smuzhiyun 						false, &prog);
1583*4882a593Smuzhiyun 				break;
1584*4882a593Smuzhiyun 			}
1585*4882a593Smuzhiyun 			if (!bpf_prog->aux->verifier_zext)
1586*4882a593Smuzhiyun 				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1587*4882a593Smuzhiyun 			break;
1588*4882a593Smuzhiyun 		case BPF_ALU | BPF_LSH | BPF_X:
1589*4882a593Smuzhiyun 		case BPF_ALU | BPF_RSH | BPF_X:
1590*4882a593Smuzhiyun 		case BPF_ALU | BPF_ARSH | BPF_K:
1591*4882a593Smuzhiyun 		case BPF_ALU | BPF_ARSH | BPF_X:
1592*4882a593Smuzhiyun 			switch (BPF_SRC(code)) {
1593*4882a593Smuzhiyun 			case BPF_X:
1594*4882a593Smuzhiyun 				emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1595*4882a593Smuzhiyun 						  dstk, sstk, &prog);
1596*4882a593Smuzhiyun 				break;
1597*4882a593Smuzhiyun 			case BPF_K:
1598*4882a593Smuzhiyun 				/* mov ecx,imm32*/
1599*4882a593Smuzhiyun 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1600*4882a593Smuzhiyun 					    imm32);
1601*4882a593Smuzhiyun 				emit_ia32_shift_r(BPF_OP(code), dst_lo,
1602*4882a593Smuzhiyun 						  IA32_ECX, dstk, false,
1603*4882a593Smuzhiyun 						  &prog);
1604*4882a593Smuzhiyun 				break;
1605*4882a593Smuzhiyun 			}
1606*4882a593Smuzhiyun 			if (!bpf_prog->aux->verifier_zext)
1607*4882a593Smuzhiyun 				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1608*4882a593Smuzhiyun 			break;
1609*4882a593Smuzhiyun 		/* dst = dst / src(imm) */
1610*4882a593Smuzhiyun 		/* dst = dst % src(imm) */
1611*4882a593Smuzhiyun 		case BPF_ALU | BPF_DIV | BPF_K:
1612*4882a593Smuzhiyun 		case BPF_ALU | BPF_DIV | BPF_X:
1613*4882a593Smuzhiyun 		case BPF_ALU | BPF_MOD | BPF_K:
1614*4882a593Smuzhiyun 		case BPF_ALU | BPF_MOD | BPF_X:
1615*4882a593Smuzhiyun 			switch (BPF_SRC(code)) {
1616*4882a593Smuzhiyun 			case BPF_X:
1617*4882a593Smuzhiyun 				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1618*4882a593Smuzhiyun 						    src_lo, dstk, sstk, &prog);
1619*4882a593Smuzhiyun 				break;
1620*4882a593Smuzhiyun 			case BPF_K:
1621*4882a593Smuzhiyun 				/* mov ecx,imm32*/
1622*4882a593Smuzhiyun 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1623*4882a593Smuzhiyun 					    imm32);
1624*4882a593Smuzhiyun 				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1625*4882a593Smuzhiyun 						    IA32_ECX, dstk, false,
1626*4882a593Smuzhiyun 						    &prog);
1627*4882a593Smuzhiyun 				break;
1628*4882a593Smuzhiyun 			}
1629*4882a593Smuzhiyun 			if (!bpf_prog->aux->verifier_zext)
1630*4882a593Smuzhiyun 				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1631*4882a593Smuzhiyun 			break;
1632*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_DIV | BPF_K:
1633*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_DIV | BPF_X:
1634*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_MOD | BPF_K:
1635*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_MOD | BPF_X:
1636*4882a593Smuzhiyun 			goto notyet;
1637*4882a593Smuzhiyun 		/* dst = dst >> imm */
1638*4882a593Smuzhiyun 		/* dst = dst << imm */
1639*4882a593Smuzhiyun 		case BPF_ALU | BPF_RSH | BPF_K:
1640*4882a593Smuzhiyun 		case BPF_ALU | BPF_LSH | BPF_K:
1641*4882a593Smuzhiyun 			if (unlikely(imm32 > 31))
1642*4882a593Smuzhiyun 				return -EINVAL;
1643*4882a593Smuzhiyun 			/* mov ecx,imm32*/
1644*4882a593Smuzhiyun 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1645*4882a593Smuzhiyun 			emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1646*4882a593Smuzhiyun 					  false, &prog);
1647*4882a593Smuzhiyun 			if (!bpf_prog->aux->verifier_zext)
1648*4882a593Smuzhiyun 				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1649*4882a593Smuzhiyun 			break;
1650*4882a593Smuzhiyun 		/* dst = dst << imm */
1651*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_LSH | BPF_K:
1652*4882a593Smuzhiyun 			if (unlikely(imm32 > 63))
1653*4882a593Smuzhiyun 				return -EINVAL;
1654*4882a593Smuzhiyun 			emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1655*4882a593Smuzhiyun 			break;
1656*4882a593Smuzhiyun 		/* dst = dst >> imm */
1657*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_RSH | BPF_K:
1658*4882a593Smuzhiyun 			if (unlikely(imm32 > 63))
1659*4882a593Smuzhiyun 				return -EINVAL;
1660*4882a593Smuzhiyun 			emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1661*4882a593Smuzhiyun 			break;
1662*4882a593Smuzhiyun 		/* dst = dst << src */
1663*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_LSH | BPF_X:
1664*4882a593Smuzhiyun 			emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1665*4882a593Smuzhiyun 			break;
1666*4882a593Smuzhiyun 		/* dst = dst >> src */
1667*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_RSH | BPF_X:
1668*4882a593Smuzhiyun 			emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1669*4882a593Smuzhiyun 			break;
1670*4882a593Smuzhiyun 		/* dst = dst >> src (signed) */
1671*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_ARSH | BPF_X:
1672*4882a593Smuzhiyun 			emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1673*4882a593Smuzhiyun 			break;
1674*4882a593Smuzhiyun 		/* dst = dst >> imm (signed) */
1675*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_ARSH | BPF_K:
1676*4882a593Smuzhiyun 			if (unlikely(imm32 > 63))
1677*4882a593Smuzhiyun 				return -EINVAL;
1678*4882a593Smuzhiyun 			emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1679*4882a593Smuzhiyun 			break;
1680*4882a593Smuzhiyun 		/* dst = ~dst */
1681*4882a593Smuzhiyun 		case BPF_ALU | BPF_NEG:
1682*4882a593Smuzhiyun 			emit_ia32_alu_i(is64, false, BPF_OP(code),
1683*4882a593Smuzhiyun 					dst_lo, 0, dstk, &prog);
1684*4882a593Smuzhiyun 			if (!bpf_prog->aux->verifier_zext)
1685*4882a593Smuzhiyun 				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1686*4882a593Smuzhiyun 			break;
1687*4882a593Smuzhiyun 		/* dst = ~dst (64 bit) */
1688*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_NEG:
1689*4882a593Smuzhiyun 			emit_ia32_neg64(dst, dstk, &prog);
1690*4882a593Smuzhiyun 			break;
1691*4882a593Smuzhiyun 		/* dst = dst * src/imm */
1692*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_MUL | BPF_X:
1693*4882a593Smuzhiyun 		case BPF_ALU64 | BPF_MUL | BPF_K:
1694*4882a593Smuzhiyun 			switch (BPF_SRC(code)) {
1695*4882a593Smuzhiyun 			case BPF_X:
1696*4882a593Smuzhiyun 				emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1697*4882a593Smuzhiyun 				break;
1698*4882a593Smuzhiyun 			case BPF_K:
1699*4882a593Smuzhiyun 				emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1700*4882a593Smuzhiyun 				break;
1701*4882a593Smuzhiyun 			}
1702*4882a593Smuzhiyun 			break;
1703*4882a593Smuzhiyun 		/* dst = htole(dst) */
1704*4882a593Smuzhiyun 		case BPF_ALU | BPF_END | BPF_FROM_LE:
1705*4882a593Smuzhiyun 			emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
1706*4882a593Smuzhiyun 					    bpf_prog->aux);
1707*4882a593Smuzhiyun 			break;
1708*4882a593Smuzhiyun 		/* dst = htobe(dst) */
1709*4882a593Smuzhiyun 		case BPF_ALU | BPF_END | BPF_FROM_BE:
1710*4882a593Smuzhiyun 			emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
1711*4882a593Smuzhiyun 					    bpf_prog->aux);
1712*4882a593Smuzhiyun 			break;
1713*4882a593Smuzhiyun 		/* dst = imm64 */
1714*4882a593Smuzhiyun 		case BPF_LD | BPF_IMM | BPF_DW: {
1715*4882a593Smuzhiyun 			s32 hi, lo = imm32;
1716*4882a593Smuzhiyun 
1717*4882a593Smuzhiyun 			hi = insn[1].imm;
1718*4882a593Smuzhiyun 			emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1719*4882a593Smuzhiyun 			emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1720*4882a593Smuzhiyun 			insn++;
1721*4882a593Smuzhiyun 			i++;
1722*4882a593Smuzhiyun 			break;
1723*4882a593Smuzhiyun 		}
1724*4882a593Smuzhiyun 		/* speculation barrier */
1725*4882a593Smuzhiyun 		case BPF_ST | BPF_NOSPEC:
1726*4882a593Smuzhiyun 			if (boot_cpu_has(X86_FEATURE_XMM2))
1727*4882a593Smuzhiyun 				/* Emit 'lfence' */
1728*4882a593Smuzhiyun 				EMIT3(0x0F, 0xAE, 0xE8);
1729*4882a593Smuzhiyun 			break;
1730*4882a593Smuzhiyun 		/* ST: *(u8*)(dst_reg + off) = imm */
1731*4882a593Smuzhiyun 		case BPF_ST | BPF_MEM | BPF_H:
1732*4882a593Smuzhiyun 		case BPF_ST | BPF_MEM | BPF_B:
1733*4882a593Smuzhiyun 		case BPF_ST | BPF_MEM | BPF_W:
1734*4882a593Smuzhiyun 		case BPF_ST | BPF_MEM | BPF_DW:
1735*4882a593Smuzhiyun 			if (dstk)
1736*4882a593Smuzhiyun 				/* mov eax,dword ptr [ebp+off] */
1737*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1738*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
1739*4882a593Smuzhiyun 			else
1740*4882a593Smuzhiyun 				/* mov eax,dst_lo */
1741*4882a593Smuzhiyun 				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1742*4882a593Smuzhiyun 
1743*4882a593Smuzhiyun 			switch (BPF_SIZE(code)) {
1744*4882a593Smuzhiyun 			case BPF_B:
1745*4882a593Smuzhiyun 				EMIT(0xC6, 1); break;
1746*4882a593Smuzhiyun 			case BPF_H:
1747*4882a593Smuzhiyun 				EMIT2(0x66, 0xC7); break;
1748*4882a593Smuzhiyun 			case BPF_W:
1749*4882a593Smuzhiyun 			case BPF_DW:
1750*4882a593Smuzhiyun 				EMIT(0xC7, 1); break;
1751*4882a593Smuzhiyun 			}
1752*4882a593Smuzhiyun 
1753*4882a593Smuzhiyun 			if (is_imm8(insn->off))
1754*4882a593Smuzhiyun 				EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1755*4882a593Smuzhiyun 			else
1756*4882a593Smuzhiyun 				EMIT1_off32(add_1reg(0x80, IA32_EAX),
1757*4882a593Smuzhiyun 					    insn->off);
1758*4882a593Smuzhiyun 			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1759*4882a593Smuzhiyun 
1760*4882a593Smuzhiyun 			if (BPF_SIZE(code) == BPF_DW) {
1761*4882a593Smuzhiyun 				u32 hi;
1762*4882a593Smuzhiyun 
1763*4882a593Smuzhiyun 				hi = imm32 & (1<<31) ? (u32)~0 : 0;
1764*4882a593Smuzhiyun 				EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1765*4882a593Smuzhiyun 					    insn->off + 4);
1766*4882a593Smuzhiyun 				EMIT(hi, 4);
1767*4882a593Smuzhiyun 			}
1768*4882a593Smuzhiyun 			break;
1769*4882a593Smuzhiyun 
1770*4882a593Smuzhiyun 		/* STX: *(u8*)(dst_reg + off) = src_reg */
1771*4882a593Smuzhiyun 		case BPF_STX | BPF_MEM | BPF_B:
1772*4882a593Smuzhiyun 		case BPF_STX | BPF_MEM | BPF_H:
1773*4882a593Smuzhiyun 		case BPF_STX | BPF_MEM | BPF_W:
1774*4882a593Smuzhiyun 		case BPF_STX | BPF_MEM | BPF_DW:
1775*4882a593Smuzhiyun 			if (dstk)
1776*4882a593Smuzhiyun 				/* mov eax,dword ptr [ebp+off] */
1777*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1778*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
1779*4882a593Smuzhiyun 			else
1780*4882a593Smuzhiyun 				/* mov eax,dst_lo */
1781*4882a593Smuzhiyun 				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1782*4882a593Smuzhiyun 
1783*4882a593Smuzhiyun 			if (sstk)
1784*4882a593Smuzhiyun 				/* mov edx,dword ptr [ebp+off] */
1785*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1786*4882a593Smuzhiyun 				      STACK_VAR(src_lo));
1787*4882a593Smuzhiyun 			else
1788*4882a593Smuzhiyun 				/* mov edx,src_lo */
1789*4882a593Smuzhiyun 				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1790*4882a593Smuzhiyun 
1791*4882a593Smuzhiyun 			switch (BPF_SIZE(code)) {
1792*4882a593Smuzhiyun 			case BPF_B:
1793*4882a593Smuzhiyun 				EMIT(0x88, 1); break;
1794*4882a593Smuzhiyun 			case BPF_H:
1795*4882a593Smuzhiyun 				EMIT2(0x66, 0x89); break;
1796*4882a593Smuzhiyun 			case BPF_W:
1797*4882a593Smuzhiyun 			case BPF_DW:
1798*4882a593Smuzhiyun 				EMIT(0x89, 1); break;
1799*4882a593Smuzhiyun 			}
1800*4882a593Smuzhiyun 
1801*4882a593Smuzhiyun 			if (is_imm8(insn->off))
1802*4882a593Smuzhiyun 				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1803*4882a593Smuzhiyun 				      insn->off);
1804*4882a593Smuzhiyun 			else
1805*4882a593Smuzhiyun 				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1806*4882a593Smuzhiyun 					    insn->off);
1807*4882a593Smuzhiyun 
1808*4882a593Smuzhiyun 			if (BPF_SIZE(code) == BPF_DW) {
1809*4882a593Smuzhiyun 				if (sstk)
1810*4882a593Smuzhiyun 					/* mov edi,dword ptr [ebp+off] */
1811*4882a593Smuzhiyun 					EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1812*4882a593Smuzhiyun 							     IA32_EDX),
1813*4882a593Smuzhiyun 					      STACK_VAR(src_hi));
1814*4882a593Smuzhiyun 				else
1815*4882a593Smuzhiyun 					/* mov edi,src_hi */
1816*4882a593Smuzhiyun 					EMIT2(0x8B, add_2reg(0xC0, src_hi,
1817*4882a593Smuzhiyun 							     IA32_EDX));
1818*4882a593Smuzhiyun 				EMIT1(0x89);
1819*4882a593Smuzhiyun 				if (is_imm8(insn->off + 4)) {
1820*4882a593Smuzhiyun 					EMIT2(add_2reg(0x40, IA32_EAX,
1821*4882a593Smuzhiyun 						       IA32_EDX),
1822*4882a593Smuzhiyun 					      insn->off + 4);
1823*4882a593Smuzhiyun 				} else {
1824*4882a593Smuzhiyun 					EMIT1(add_2reg(0x80, IA32_EAX,
1825*4882a593Smuzhiyun 						       IA32_EDX));
1826*4882a593Smuzhiyun 					EMIT(insn->off + 4, 4);
1827*4882a593Smuzhiyun 				}
1828*4882a593Smuzhiyun 			}
1829*4882a593Smuzhiyun 			break;
1830*4882a593Smuzhiyun 
1831*4882a593Smuzhiyun 		/* LDX: dst_reg = *(u8*)(src_reg + off) */
1832*4882a593Smuzhiyun 		case BPF_LDX | BPF_MEM | BPF_B:
1833*4882a593Smuzhiyun 		case BPF_LDX | BPF_MEM | BPF_H:
1834*4882a593Smuzhiyun 		case BPF_LDX | BPF_MEM | BPF_W:
1835*4882a593Smuzhiyun 		case BPF_LDX | BPF_MEM | BPF_DW:
1836*4882a593Smuzhiyun 			if (sstk)
1837*4882a593Smuzhiyun 				/* mov eax,dword ptr [ebp+off] */
1838*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1839*4882a593Smuzhiyun 				      STACK_VAR(src_lo));
1840*4882a593Smuzhiyun 			else
1841*4882a593Smuzhiyun 				/* mov eax,dword ptr [ebp+off] */
1842*4882a593Smuzhiyun 				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1843*4882a593Smuzhiyun 
1844*4882a593Smuzhiyun 			switch (BPF_SIZE(code)) {
1845*4882a593Smuzhiyun 			case BPF_B:
1846*4882a593Smuzhiyun 				EMIT2(0x0F, 0xB6); break;
1847*4882a593Smuzhiyun 			case BPF_H:
1848*4882a593Smuzhiyun 				EMIT2(0x0F, 0xB7); break;
1849*4882a593Smuzhiyun 			case BPF_W:
1850*4882a593Smuzhiyun 			case BPF_DW:
1851*4882a593Smuzhiyun 				EMIT(0x8B, 1); break;
1852*4882a593Smuzhiyun 			}
1853*4882a593Smuzhiyun 
1854*4882a593Smuzhiyun 			if (is_imm8(insn->off))
1855*4882a593Smuzhiyun 				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1856*4882a593Smuzhiyun 				      insn->off);
1857*4882a593Smuzhiyun 			else
1858*4882a593Smuzhiyun 				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1859*4882a593Smuzhiyun 					    insn->off);
1860*4882a593Smuzhiyun 
1861*4882a593Smuzhiyun 			if (dstk)
1862*4882a593Smuzhiyun 				/* mov dword ptr [ebp+off],edx */
1863*4882a593Smuzhiyun 				EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1864*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
1865*4882a593Smuzhiyun 			else
1866*4882a593Smuzhiyun 				/* mov dst_lo,edx */
1867*4882a593Smuzhiyun 				EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1868*4882a593Smuzhiyun 			switch (BPF_SIZE(code)) {
1869*4882a593Smuzhiyun 			case BPF_B:
1870*4882a593Smuzhiyun 			case BPF_H:
1871*4882a593Smuzhiyun 			case BPF_W:
1872*4882a593Smuzhiyun 				if (bpf_prog->aux->verifier_zext)
1873*4882a593Smuzhiyun 					break;
1874*4882a593Smuzhiyun 				if (dstk) {
1875*4882a593Smuzhiyun 					EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
1876*4882a593Smuzhiyun 					      STACK_VAR(dst_hi));
1877*4882a593Smuzhiyun 					EMIT(0x0, 4);
1878*4882a593Smuzhiyun 				} else {
1879*4882a593Smuzhiyun 					/* xor dst_hi,dst_hi */
1880*4882a593Smuzhiyun 					EMIT2(0x33,
1881*4882a593Smuzhiyun 					      add_2reg(0xC0, dst_hi, dst_hi));
1882*4882a593Smuzhiyun 				}
1883*4882a593Smuzhiyun 				break;
1884*4882a593Smuzhiyun 			case BPF_DW:
1885*4882a593Smuzhiyun 				EMIT2_off32(0x8B,
1886*4882a593Smuzhiyun 					    add_2reg(0x80, IA32_EAX, IA32_EDX),
1887*4882a593Smuzhiyun 					    insn->off + 4);
1888*4882a593Smuzhiyun 				if (dstk)
1889*4882a593Smuzhiyun 					EMIT3(0x89,
1890*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
1891*4882a593Smuzhiyun 						       IA32_EDX),
1892*4882a593Smuzhiyun 					      STACK_VAR(dst_hi));
1893*4882a593Smuzhiyun 				else
1894*4882a593Smuzhiyun 					EMIT2(0x89,
1895*4882a593Smuzhiyun 					      add_2reg(0xC0, dst_hi, IA32_EDX));
1896*4882a593Smuzhiyun 				break;
1897*4882a593Smuzhiyun 			default:
1898*4882a593Smuzhiyun 				break;
1899*4882a593Smuzhiyun 			}
1900*4882a593Smuzhiyun 			break;
1901*4882a593Smuzhiyun 		/* call */
1902*4882a593Smuzhiyun 		case BPF_JMP | BPF_CALL:
1903*4882a593Smuzhiyun 		{
1904*4882a593Smuzhiyun 			const u8 *r1 = bpf2ia32[BPF_REG_1];
1905*4882a593Smuzhiyun 			const u8 *r2 = bpf2ia32[BPF_REG_2];
1906*4882a593Smuzhiyun 			const u8 *r3 = bpf2ia32[BPF_REG_3];
1907*4882a593Smuzhiyun 			const u8 *r4 = bpf2ia32[BPF_REG_4];
1908*4882a593Smuzhiyun 			const u8 *r5 = bpf2ia32[BPF_REG_5];
1909*4882a593Smuzhiyun 
1910*4882a593Smuzhiyun 			if (insn->src_reg == BPF_PSEUDO_CALL)
1911*4882a593Smuzhiyun 				goto notyet;
1912*4882a593Smuzhiyun 
1913*4882a593Smuzhiyun 			func = (u8 *) __bpf_call_base + imm32;
1914*4882a593Smuzhiyun 			jmp_offset = func - (image + addrs[i]);
1915*4882a593Smuzhiyun 
1916*4882a593Smuzhiyun 			if (!imm32 || !is_simm32(jmp_offset)) {
1917*4882a593Smuzhiyun 				pr_err("unsupported BPF func %d addr %p image %p\n",
1918*4882a593Smuzhiyun 				       imm32, func, image);
1919*4882a593Smuzhiyun 				return -EINVAL;
1920*4882a593Smuzhiyun 			}
1921*4882a593Smuzhiyun 
1922*4882a593Smuzhiyun 			/* mov eax,dword ptr [ebp+off] */
1923*4882a593Smuzhiyun 			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1924*4882a593Smuzhiyun 			      STACK_VAR(r1[0]));
1925*4882a593Smuzhiyun 			/* mov edx,dword ptr [ebp+off] */
1926*4882a593Smuzhiyun 			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1927*4882a593Smuzhiyun 			      STACK_VAR(r1[1]));
1928*4882a593Smuzhiyun 
1929*4882a593Smuzhiyun 			emit_push_r64(r5, &prog);
1930*4882a593Smuzhiyun 			emit_push_r64(r4, &prog);
1931*4882a593Smuzhiyun 			emit_push_r64(r3, &prog);
1932*4882a593Smuzhiyun 			emit_push_r64(r2, &prog);
1933*4882a593Smuzhiyun 
1934*4882a593Smuzhiyun 			EMIT1_off32(0xE8, jmp_offset + 9);
1935*4882a593Smuzhiyun 
1936*4882a593Smuzhiyun 			/* mov dword ptr [ebp+off],eax */
1937*4882a593Smuzhiyun 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1938*4882a593Smuzhiyun 			      STACK_VAR(r0[0]));
1939*4882a593Smuzhiyun 			/* mov dword ptr [ebp+off],edx */
1940*4882a593Smuzhiyun 			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1941*4882a593Smuzhiyun 			      STACK_VAR(r0[1]));
1942*4882a593Smuzhiyun 
1943*4882a593Smuzhiyun 			/* add esp,32 */
1944*4882a593Smuzhiyun 			EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
1945*4882a593Smuzhiyun 			break;
1946*4882a593Smuzhiyun 		}
1947*4882a593Smuzhiyun 		case BPF_JMP | BPF_TAIL_CALL:
1948*4882a593Smuzhiyun 			emit_bpf_tail_call(&prog, image + addrs[i - 1]);
1949*4882a593Smuzhiyun 			break;
1950*4882a593Smuzhiyun 
1951*4882a593Smuzhiyun 		/* cond jump */
1952*4882a593Smuzhiyun 		case BPF_JMP | BPF_JEQ | BPF_X:
1953*4882a593Smuzhiyun 		case BPF_JMP | BPF_JNE | BPF_X:
1954*4882a593Smuzhiyun 		case BPF_JMP | BPF_JGT | BPF_X:
1955*4882a593Smuzhiyun 		case BPF_JMP | BPF_JLT | BPF_X:
1956*4882a593Smuzhiyun 		case BPF_JMP | BPF_JGE | BPF_X:
1957*4882a593Smuzhiyun 		case BPF_JMP | BPF_JLE | BPF_X:
1958*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JEQ | BPF_X:
1959*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JNE | BPF_X:
1960*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JGT | BPF_X:
1961*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JLT | BPF_X:
1962*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JGE | BPF_X:
1963*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JLE | BPF_X:
1964*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSGT | BPF_X:
1965*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSLE | BPF_X:
1966*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSLT | BPF_X:
1967*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSGE | BPF_X: {
1968*4882a593Smuzhiyun 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
1969*4882a593Smuzhiyun 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1970*4882a593Smuzhiyun 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1971*4882a593Smuzhiyun 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
1972*4882a593Smuzhiyun 			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
1973*4882a593Smuzhiyun 
1974*4882a593Smuzhiyun 			if (dstk) {
1975*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1976*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
1977*4882a593Smuzhiyun 				if (is_jmp64)
1978*4882a593Smuzhiyun 					EMIT3(0x8B,
1979*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
1980*4882a593Smuzhiyun 						       IA32_EDX),
1981*4882a593Smuzhiyun 					      STACK_VAR(dst_hi));
1982*4882a593Smuzhiyun 			}
1983*4882a593Smuzhiyun 
1984*4882a593Smuzhiyun 			if (sstk) {
1985*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
1986*4882a593Smuzhiyun 				      STACK_VAR(src_lo));
1987*4882a593Smuzhiyun 				if (is_jmp64)
1988*4882a593Smuzhiyun 					EMIT3(0x8B,
1989*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
1990*4882a593Smuzhiyun 						       IA32_EBX),
1991*4882a593Smuzhiyun 					      STACK_VAR(src_hi));
1992*4882a593Smuzhiyun 			}
1993*4882a593Smuzhiyun 
1994*4882a593Smuzhiyun 			if (is_jmp64) {
1995*4882a593Smuzhiyun 				/* cmp dreg_hi,sreg_hi */
1996*4882a593Smuzhiyun 				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
1997*4882a593Smuzhiyun 				EMIT2(IA32_JNE, 2);
1998*4882a593Smuzhiyun 			}
1999*4882a593Smuzhiyun 			/* cmp dreg_lo,sreg_lo */
2000*4882a593Smuzhiyun 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2001*4882a593Smuzhiyun 			goto emit_cond_jmp;
2002*4882a593Smuzhiyun 		}
2003*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSGT | BPF_X:
2004*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSLE | BPF_X:
2005*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSLT | BPF_X:
2006*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSGE | BPF_X: {
2007*4882a593Smuzhiyun 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2008*4882a593Smuzhiyun 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2009*4882a593Smuzhiyun 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2010*4882a593Smuzhiyun 			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2011*4882a593Smuzhiyun 
2012*4882a593Smuzhiyun 			if (dstk) {
2013*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2014*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
2015*4882a593Smuzhiyun 				EMIT3(0x8B,
2016*4882a593Smuzhiyun 				      add_2reg(0x40, IA32_EBP,
2017*4882a593Smuzhiyun 					       IA32_EDX),
2018*4882a593Smuzhiyun 				      STACK_VAR(dst_hi));
2019*4882a593Smuzhiyun 			}
2020*4882a593Smuzhiyun 
2021*4882a593Smuzhiyun 			if (sstk) {
2022*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2023*4882a593Smuzhiyun 				      STACK_VAR(src_lo));
2024*4882a593Smuzhiyun 				EMIT3(0x8B,
2025*4882a593Smuzhiyun 				      add_2reg(0x40, IA32_EBP,
2026*4882a593Smuzhiyun 					       IA32_EBX),
2027*4882a593Smuzhiyun 				      STACK_VAR(src_hi));
2028*4882a593Smuzhiyun 			}
2029*4882a593Smuzhiyun 
2030*4882a593Smuzhiyun 			/* cmp dreg_hi,sreg_hi */
2031*4882a593Smuzhiyun 			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2032*4882a593Smuzhiyun 			EMIT2(IA32_JNE, 10);
2033*4882a593Smuzhiyun 			/* cmp dreg_lo,sreg_lo */
2034*4882a593Smuzhiyun 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2035*4882a593Smuzhiyun 			goto emit_cond_jmp_signed;
2036*4882a593Smuzhiyun 		}
2037*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSET | BPF_X:
2038*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSET | BPF_X: {
2039*4882a593Smuzhiyun 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2040*4882a593Smuzhiyun 			u8 dreg_lo = IA32_EAX;
2041*4882a593Smuzhiyun 			u8 dreg_hi = IA32_EDX;
2042*4882a593Smuzhiyun 			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2043*4882a593Smuzhiyun 			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2044*4882a593Smuzhiyun 
2045*4882a593Smuzhiyun 			if (dstk) {
2046*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2047*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
2048*4882a593Smuzhiyun 				if (is_jmp64)
2049*4882a593Smuzhiyun 					EMIT3(0x8B,
2050*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
2051*4882a593Smuzhiyun 						       IA32_EDX),
2052*4882a593Smuzhiyun 					      STACK_VAR(dst_hi));
2053*4882a593Smuzhiyun 			} else {
2054*4882a593Smuzhiyun 				/* mov dreg_lo,dst_lo */
2055*4882a593Smuzhiyun 				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2056*4882a593Smuzhiyun 				if (is_jmp64)
2057*4882a593Smuzhiyun 					/* mov dreg_hi,dst_hi */
2058*4882a593Smuzhiyun 					EMIT2(0x89,
2059*4882a593Smuzhiyun 					      add_2reg(0xC0, dreg_hi, dst_hi));
2060*4882a593Smuzhiyun 			}
2061*4882a593Smuzhiyun 
2062*4882a593Smuzhiyun 			if (sstk) {
2063*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2064*4882a593Smuzhiyun 				      STACK_VAR(src_lo));
2065*4882a593Smuzhiyun 				if (is_jmp64)
2066*4882a593Smuzhiyun 					EMIT3(0x8B,
2067*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
2068*4882a593Smuzhiyun 						       IA32_EBX),
2069*4882a593Smuzhiyun 					      STACK_VAR(src_hi));
2070*4882a593Smuzhiyun 			}
2071*4882a593Smuzhiyun 			/* and dreg_lo,sreg_lo */
2072*4882a593Smuzhiyun 			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2073*4882a593Smuzhiyun 			if (is_jmp64) {
2074*4882a593Smuzhiyun 				/* and dreg_hi,sreg_hi */
2075*4882a593Smuzhiyun 				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2076*4882a593Smuzhiyun 				/* or dreg_lo,dreg_hi */
2077*4882a593Smuzhiyun 				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2078*4882a593Smuzhiyun 			}
2079*4882a593Smuzhiyun 			goto emit_cond_jmp;
2080*4882a593Smuzhiyun 		}
2081*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSET | BPF_K:
2082*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSET | BPF_K: {
2083*4882a593Smuzhiyun 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2084*4882a593Smuzhiyun 			u8 dreg_lo = IA32_EAX;
2085*4882a593Smuzhiyun 			u8 dreg_hi = IA32_EDX;
2086*4882a593Smuzhiyun 			u8 sreg_lo = IA32_ECX;
2087*4882a593Smuzhiyun 			u8 sreg_hi = IA32_EBX;
2088*4882a593Smuzhiyun 			u32 hi;
2089*4882a593Smuzhiyun 
2090*4882a593Smuzhiyun 			if (dstk) {
2091*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2092*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
2093*4882a593Smuzhiyun 				if (is_jmp64)
2094*4882a593Smuzhiyun 					EMIT3(0x8B,
2095*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
2096*4882a593Smuzhiyun 						       IA32_EDX),
2097*4882a593Smuzhiyun 					      STACK_VAR(dst_hi));
2098*4882a593Smuzhiyun 			} else {
2099*4882a593Smuzhiyun 				/* mov dreg_lo,dst_lo */
2100*4882a593Smuzhiyun 				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2101*4882a593Smuzhiyun 				if (is_jmp64)
2102*4882a593Smuzhiyun 					/* mov dreg_hi,dst_hi */
2103*4882a593Smuzhiyun 					EMIT2(0x89,
2104*4882a593Smuzhiyun 					      add_2reg(0xC0, dreg_hi, dst_hi));
2105*4882a593Smuzhiyun 			}
2106*4882a593Smuzhiyun 
2107*4882a593Smuzhiyun 			/* mov ecx,imm32 */
2108*4882a593Smuzhiyun 			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
2109*4882a593Smuzhiyun 
2110*4882a593Smuzhiyun 			/* and dreg_lo,sreg_lo */
2111*4882a593Smuzhiyun 			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2112*4882a593Smuzhiyun 			if (is_jmp64) {
2113*4882a593Smuzhiyun 				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2114*4882a593Smuzhiyun 				/* mov ebx,imm32 */
2115*4882a593Smuzhiyun 				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
2116*4882a593Smuzhiyun 				/* and dreg_hi,sreg_hi */
2117*4882a593Smuzhiyun 				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2118*4882a593Smuzhiyun 				/* or dreg_lo,dreg_hi */
2119*4882a593Smuzhiyun 				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2120*4882a593Smuzhiyun 			}
2121*4882a593Smuzhiyun 			goto emit_cond_jmp;
2122*4882a593Smuzhiyun 		}
2123*4882a593Smuzhiyun 		case BPF_JMP | BPF_JEQ | BPF_K:
2124*4882a593Smuzhiyun 		case BPF_JMP | BPF_JNE | BPF_K:
2125*4882a593Smuzhiyun 		case BPF_JMP | BPF_JGT | BPF_K:
2126*4882a593Smuzhiyun 		case BPF_JMP | BPF_JLT | BPF_K:
2127*4882a593Smuzhiyun 		case BPF_JMP | BPF_JGE | BPF_K:
2128*4882a593Smuzhiyun 		case BPF_JMP | BPF_JLE | BPF_K:
2129*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JEQ | BPF_K:
2130*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JNE | BPF_K:
2131*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JGT | BPF_K:
2132*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JLT | BPF_K:
2133*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JGE | BPF_K:
2134*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JLE | BPF_K:
2135*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSGT | BPF_K:
2136*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSLE | BPF_K:
2137*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSLT | BPF_K:
2138*4882a593Smuzhiyun 		case BPF_JMP32 | BPF_JSGE | BPF_K: {
2139*4882a593Smuzhiyun 			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2140*4882a593Smuzhiyun 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2141*4882a593Smuzhiyun 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2142*4882a593Smuzhiyun 			u8 sreg_lo = IA32_ECX;
2143*4882a593Smuzhiyun 			u8 sreg_hi = IA32_EBX;
2144*4882a593Smuzhiyun 			u32 hi;
2145*4882a593Smuzhiyun 
2146*4882a593Smuzhiyun 			if (dstk) {
2147*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2148*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
2149*4882a593Smuzhiyun 				if (is_jmp64)
2150*4882a593Smuzhiyun 					EMIT3(0x8B,
2151*4882a593Smuzhiyun 					      add_2reg(0x40, IA32_EBP,
2152*4882a593Smuzhiyun 						       IA32_EDX),
2153*4882a593Smuzhiyun 					      STACK_VAR(dst_hi));
2154*4882a593Smuzhiyun 			}
2155*4882a593Smuzhiyun 
2156*4882a593Smuzhiyun 			/* mov ecx,imm32 */
2157*4882a593Smuzhiyun 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2158*4882a593Smuzhiyun 			if (is_jmp64) {
2159*4882a593Smuzhiyun 				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2160*4882a593Smuzhiyun 				/* mov ebx,imm32 */
2161*4882a593Smuzhiyun 				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2162*4882a593Smuzhiyun 				/* cmp dreg_hi,sreg_hi */
2163*4882a593Smuzhiyun 				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2164*4882a593Smuzhiyun 				EMIT2(IA32_JNE, 2);
2165*4882a593Smuzhiyun 			}
2166*4882a593Smuzhiyun 			/* cmp dreg_lo,sreg_lo */
2167*4882a593Smuzhiyun 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2168*4882a593Smuzhiyun 
2169*4882a593Smuzhiyun emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2170*4882a593Smuzhiyun 			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2171*4882a593Smuzhiyun 				return -EFAULT;
2172*4882a593Smuzhiyun 			jmp_offset = addrs[i + insn->off] - addrs[i];
2173*4882a593Smuzhiyun 			if (is_imm8(jmp_offset)) {
2174*4882a593Smuzhiyun 				EMIT2(jmp_cond, jmp_offset);
2175*4882a593Smuzhiyun 			} else if (is_simm32(jmp_offset)) {
2176*4882a593Smuzhiyun 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2177*4882a593Smuzhiyun 			} else {
2178*4882a593Smuzhiyun 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2179*4882a593Smuzhiyun 				return -EFAULT;
2180*4882a593Smuzhiyun 			}
2181*4882a593Smuzhiyun 			break;
2182*4882a593Smuzhiyun 		}
2183*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSGT | BPF_K:
2184*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSLE | BPF_K:
2185*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSLT | BPF_K:
2186*4882a593Smuzhiyun 		case BPF_JMP | BPF_JSGE | BPF_K: {
2187*4882a593Smuzhiyun 			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2188*4882a593Smuzhiyun 			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2189*4882a593Smuzhiyun 			u8 sreg_lo = IA32_ECX;
2190*4882a593Smuzhiyun 			u8 sreg_hi = IA32_EBX;
2191*4882a593Smuzhiyun 			u32 hi;
2192*4882a593Smuzhiyun 
2193*4882a593Smuzhiyun 			if (dstk) {
2194*4882a593Smuzhiyun 				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2195*4882a593Smuzhiyun 				      STACK_VAR(dst_lo));
2196*4882a593Smuzhiyun 				EMIT3(0x8B,
2197*4882a593Smuzhiyun 				      add_2reg(0x40, IA32_EBP,
2198*4882a593Smuzhiyun 					       IA32_EDX),
2199*4882a593Smuzhiyun 				      STACK_VAR(dst_hi));
2200*4882a593Smuzhiyun 			}
2201*4882a593Smuzhiyun 
2202*4882a593Smuzhiyun 			/* mov ecx,imm32 */
2203*4882a593Smuzhiyun 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2204*4882a593Smuzhiyun 			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2205*4882a593Smuzhiyun 			/* mov ebx,imm32 */
2206*4882a593Smuzhiyun 			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2207*4882a593Smuzhiyun 			/* cmp dreg_hi,sreg_hi */
2208*4882a593Smuzhiyun 			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2209*4882a593Smuzhiyun 			EMIT2(IA32_JNE, 10);
2210*4882a593Smuzhiyun 			/* cmp dreg_lo,sreg_lo */
2211*4882a593Smuzhiyun 			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2212*4882a593Smuzhiyun 
2213*4882a593Smuzhiyun 			/*
2214*4882a593Smuzhiyun 			 * For simplicity of branch offset computation,
2215*4882a593Smuzhiyun 			 * let's use fixed jump coding here.
2216*4882a593Smuzhiyun 			 */
2217*4882a593Smuzhiyun emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
2218*4882a593Smuzhiyun 			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
2219*4882a593Smuzhiyun 			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2220*4882a593Smuzhiyun 				return -EFAULT;
2221*4882a593Smuzhiyun 			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
2222*4882a593Smuzhiyun 			if (is_simm32(jmp_offset)) {
2223*4882a593Smuzhiyun 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2224*4882a593Smuzhiyun 			} else {
2225*4882a593Smuzhiyun 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2226*4882a593Smuzhiyun 				return -EFAULT;
2227*4882a593Smuzhiyun 			}
2228*4882a593Smuzhiyun 			EMIT2(0xEB, 6);
2229*4882a593Smuzhiyun 
2230*4882a593Smuzhiyun 			/* Check the condition for high 32-bit comparison */
2231*4882a593Smuzhiyun 			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2232*4882a593Smuzhiyun 			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2233*4882a593Smuzhiyun 				return -EFAULT;
2234*4882a593Smuzhiyun 			jmp_offset = addrs[i + insn->off] - addrs[i];
2235*4882a593Smuzhiyun 			if (is_simm32(jmp_offset)) {
2236*4882a593Smuzhiyun 				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2237*4882a593Smuzhiyun 			} else {
2238*4882a593Smuzhiyun 				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2239*4882a593Smuzhiyun 				return -EFAULT;
2240*4882a593Smuzhiyun 			}
2241*4882a593Smuzhiyun 			break;
2242*4882a593Smuzhiyun 		}
2243*4882a593Smuzhiyun 		case BPF_JMP | BPF_JA:
2244*4882a593Smuzhiyun 			if (insn->off == -1)
2245*4882a593Smuzhiyun 				/* -1 jmp instructions will always jump
2246*4882a593Smuzhiyun 				 * backwards two bytes. Explicitly handling
2247*4882a593Smuzhiyun 				 * this case avoids wasting too many passes
2248*4882a593Smuzhiyun 				 * when there are long sequences of replaced
2249*4882a593Smuzhiyun 				 * dead code.
2250*4882a593Smuzhiyun 				 */
2251*4882a593Smuzhiyun 				jmp_offset = -2;
2252*4882a593Smuzhiyun 			else
2253*4882a593Smuzhiyun 				jmp_offset = addrs[i + insn->off] - addrs[i];
2254*4882a593Smuzhiyun 
2255*4882a593Smuzhiyun 			if (!jmp_offset)
2256*4882a593Smuzhiyun 				/* Optimize out nop jumps */
2257*4882a593Smuzhiyun 				break;
2258*4882a593Smuzhiyun emit_jmp:
2259*4882a593Smuzhiyun 			if (is_imm8(jmp_offset)) {
2260*4882a593Smuzhiyun 				EMIT2(0xEB, jmp_offset);
2261*4882a593Smuzhiyun 			} else if (is_simm32(jmp_offset)) {
2262*4882a593Smuzhiyun 				EMIT1_off32(0xE9, jmp_offset);
2263*4882a593Smuzhiyun 			} else {
2264*4882a593Smuzhiyun 				pr_err("jmp gen bug %llx\n", jmp_offset);
2265*4882a593Smuzhiyun 				return -EFAULT;
2266*4882a593Smuzhiyun 			}
2267*4882a593Smuzhiyun 			break;
2268*4882a593Smuzhiyun 		/* STX XADD: lock *(u32 *)(dst + off) += src */
2269*4882a593Smuzhiyun 		case BPF_STX | BPF_XADD | BPF_W:
2270*4882a593Smuzhiyun 		/* STX XADD: lock *(u64 *)(dst + off) += src */
2271*4882a593Smuzhiyun 		case BPF_STX | BPF_XADD | BPF_DW:
2272*4882a593Smuzhiyun 			goto notyet;
2273*4882a593Smuzhiyun 		case BPF_JMP | BPF_EXIT:
2274*4882a593Smuzhiyun 			if (seen_exit) {
2275*4882a593Smuzhiyun 				jmp_offset = ctx->cleanup_addr - addrs[i];
2276*4882a593Smuzhiyun 				goto emit_jmp;
2277*4882a593Smuzhiyun 			}
2278*4882a593Smuzhiyun 			seen_exit = true;
2279*4882a593Smuzhiyun 			/* Update cleanup_addr */
2280*4882a593Smuzhiyun 			ctx->cleanup_addr = proglen;
2281*4882a593Smuzhiyun 			emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2282*4882a593Smuzhiyun 			break;
2283*4882a593Smuzhiyun notyet:
2284*4882a593Smuzhiyun 			pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2285*4882a593Smuzhiyun 			return -EFAULT;
2286*4882a593Smuzhiyun 		default:
2287*4882a593Smuzhiyun 			/*
2288*4882a593Smuzhiyun 			 * This error will be seen if new instruction was added
2289*4882a593Smuzhiyun 			 * to interpreter, but not to JIT or if there is junk in
2290*4882a593Smuzhiyun 			 * bpf_prog
2291*4882a593Smuzhiyun 			 */
2292*4882a593Smuzhiyun 			pr_err("bpf_jit: unknown opcode %02x\n", code);
2293*4882a593Smuzhiyun 			return -EINVAL;
2294*4882a593Smuzhiyun 		}
2295*4882a593Smuzhiyun 
2296*4882a593Smuzhiyun 		ilen = prog - temp;
2297*4882a593Smuzhiyun 		if (ilen > BPF_MAX_INSN_SIZE) {
2298*4882a593Smuzhiyun 			pr_err("bpf_jit: fatal insn size error\n");
2299*4882a593Smuzhiyun 			return -EFAULT;
2300*4882a593Smuzhiyun 		}
2301*4882a593Smuzhiyun 
2302*4882a593Smuzhiyun 		if (image) {
2303*4882a593Smuzhiyun 			/*
2304*4882a593Smuzhiyun 			 * When populating the image, assert that:
2305*4882a593Smuzhiyun 			 *
2306*4882a593Smuzhiyun 			 *  i) We do not write beyond the allocated space, and
2307*4882a593Smuzhiyun 			 * ii) addrs[i] did not change from the prior run, in order
2308*4882a593Smuzhiyun 			 *     to validate assumptions made for computing branch
2309*4882a593Smuzhiyun 			 *     displacements.
2310*4882a593Smuzhiyun 			 */
2311*4882a593Smuzhiyun 			if (unlikely(proglen + ilen > oldproglen ||
2312*4882a593Smuzhiyun 				     proglen + ilen != addrs[i])) {
2313*4882a593Smuzhiyun 				pr_err("bpf_jit: fatal error\n");
2314*4882a593Smuzhiyun 				return -EFAULT;
2315*4882a593Smuzhiyun 			}
2316*4882a593Smuzhiyun 			memcpy(image + proglen, temp, ilen);
2317*4882a593Smuzhiyun 		}
2318*4882a593Smuzhiyun 		proglen += ilen;
2319*4882a593Smuzhiyun 		addrs[i] = proglen;
2320*4882a593Smuzhiyun 		prog = temp;
2321*4882a593Smuzhiyun 	}
2322*4882a593Smuzhiyun 	return proglen;
2323*4882a593Smuzhiyun }
2324*4882a593Smuzhiyun 
bpf_jit_needs_zext(void)2325*4882a593Smuzhiyun bool bpf_jit_needs_zext(void)
2326*4882a593Smuzhiyun {
2327*4882a593Smuzhiyun 	return true;
2328*4882a593Smuzhiyun }
2329*4882a593Smuzhiyun 
bpf_int_jit_compile(struct bpf_prog * prog)2330*4882a593Smuzhiyun struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2331*4882a593Smuzhiyun {
2332*4882a593Smuzhiyun 	struct bpf_binary_header *header = NULL;
2333*4882a593Smuzhiyun 	struct bpf_prog *tmp, *orig_prog = prog;
2334*4882a593Smuzhiyun 	int proglen, oldproglen = 0;
2335*4882a593Smuzhiyun 	struct jit_context ctx = {};
2336*4882a593Smuzhiyun 	bool tmp_blinded = false;
2337*4882a593Smuzhiyun 	u8 *image = NULL;
2338*4882a593Smuzhiyun 	int *addrs;
2339*4882a593Smuzhiyun 	int pass;
2340*4882a593Smuzhiyun 	int i;
2341*4882a593Smuzhiyun 
2342*4882a593Smuzhiyun 	if (!prog->jit_requested)
2343*4882a593Smuzhiyun 		return orig_prog;
2344*4882a593Smuzhiyun 
2345*4882a593Smuzhiyun 	tmp = bpf_jit_blind_constants(prog);
2346*4882a593Smuzhiyun 	/*
2347*4882a593Smuzhiyun 	 * If blinding was requested and we failed during blinding,
2348*4882a593Smuzhiyun 	 * we must fall back to the interpreter.
2349*4882a593Smuzhiyun 	 */
2350*4882a593Smuzhiyun 	if (IS_ERR(tmp))
2351*4882a593Smuzhiyun 		return orig_prog;
2352*4882a593Smuzhiyun 	if (tmp != prog) {
2353*4882a593Smuzhiyun 		tmp_blinded = true;
2354*4882a593Smuzhiyun 		prog = tmp;
2355*4882a593Smuzhiyun 	}
2356*4882a593Smuzhiyun 
2357*4882a593Smuzhiyun 	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
2358*4882a593Smuzhiyun 	if (!addrs) {
2359*4882a593Smuzhiyun 		prog = orig_prog;
2360*4882a593Smuzhiyun 		goto out;
2361*4882a593Smuzhiyun 	}
2362*4882a593Smuzhiyun 
2363*4882a593Smuzhiyun 	/*
2364*4882a593Smuzhiyun 	 * Before first pass, make a rough estimation of addrs[]
2365*4882a593Smuzhiyun 	 * each BPF instruction is translated to less than 64 bytes
2366*4882a593Smuzhiyun 	 */
2367*4882a593Smuzhiyun 	for (proglen = 0, i = 0; i < prog->len; i++) {
2368*4882a593Smuzhiyun 		proglen += 64;
2369*4882a593Smuzhiyun 		addrs[i] = proglen;
2370*4882a593Smuzhiyun 	}
2371*4882a593Smuzhiyun 	ctx.cleanup_addr = proglen;
2372*4882a593Smuzhiyun 
2373*4882a593Smuzhiyun 	/*
2374*4882a593Smuzhiyun 	 * JITed image shrinks with every pass and the loop iterates
2375*4882a593Smuzhiyun 	 * until the image stops shrinking. Very large BPF programs
2376*4882a593Smuzhiyun 	 * may converge on the last pass. In such case do one more
2377*4882a593Smuzhiyun 	 * pass to emit the final image.
2378*4882a593Smuzhiyun 	 */
2379*4882a593Smuzhiyun 	for (pass = 0; pass < 20 || image; pass++) {
2380*4882a593Smuzhiyun 		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2381*4882a593Smuzhiyun 		if (proglen <= 0) {
2382*4882a593Smuzhiyun out_image:
2383*4882a593Smuzhiyun 			image = NULL;
2384*4882a593Smuzhiyun 			if (header)
2385*4882a593Smuzhiyun 				bpf_jit_binary_free(header);
2386*4882a593Smuzhiyun 			prog = orig_prog;
2387*4882a593Smuzhiyun 			goto out_addrs;
2388*4882a593Smuzhiyun 		}
2389*4882a593Smuzhiyun 		if (image) {
2390*4882a593Smuzhiyun 			if (proglen != oldproglen) {
2391*4882a593Smuzhiyun 				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2392*4882a593Smuzhiyun 				       proglen, oldproglen);
2393*4882a593Smuzhiyun 				goto out_image;
2394*4882a593Smuzhiyun 			}
2395*4882a593Smuzhiyun 			break;
2396*4882a593Smuzhiyun 		}
2397*4882a593Smuzhiyun 		if (proglen == oldproglen) {
2398*4882a593Smuzhiyun 			header = bpf_jit_binary_alloc(proglen, &image,
2399*4882a593Smuzhiyun 						      1, jit_fill_hole);
2400*4882a593Smuzhiyun 			if (!header) {
2401*4882a593Smuzhiyun 				prog = orig_prog;
2402*4882a593Smuzhiyun 				goto out_addrs;
2403*4882a593Smuzhiyun 			}
2404*4882a593Smuzhiyun 		}
2405*4882a593Smuzhiyun 		oldproglen = proglen;
2406*4882a593Smuzhiyun 		cond_resched();
2407*4882a593Smuzhiyun 	}
2408*4882a593Smuzhiyun 
2409*4882a593Smuzhiyun 	if (bpf_jit_enable > 1)
2410*4882a593Smuzhiyun 		bpf_jit_dump(prog->len, proglen, pass + 1, image);
2411*4882a593Smuzhiyun 
2412*4882a593Smuzhiyun 	if (image) {
2413*4882a593Smuzhiyun 		bpf_jit_binary_lock_ro(header);
2414*4882a593Smuzhiyun 		prog->bpf_func = (void *)image;
2415*4882a593Smuzhiyun 		prog->jited = 1;
2416*4882a593Smuzhiyun 		prog->jited_len = proglen;
2417*4882a593Smuzhiyun 	} else {
2418*4882a593Smuzhiyun 		prog = orig_prog;
2419*4882a593Smuzhiyun 	}
2420*4882a593Smuzhiyun 
2421*4882a593Smuzhiyun out_addrs:
2422*4882a593Smuzhiyun 	kfree(addrs);
2423*4882a593Smuzhiyun out:
2424*4882a593Smuzhiyun 	if (tmp_blinded)
2425*4882a593Smuzhiyun 		bpf_jit_prog_release_other(prog, prog == orig_prog ?
2426*4882a593Smuzhiyun 					   tmp : orig_prog);
2427*4882a593Smuzhiyun 	return prog;
2428*4882a593Smuzhiyun }
2429