xref: /OK3568_Linux_fs/kernel/arch/riscv/net/bpf_jit_comp64.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun // SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun /* BPF JIT compiler for RV64G
3*4882a593Smuzhiyun  *
4*4882a593Smuzhiyun  * Copyright(c) 2019 Björn Töpel <bjorn.topel@gmail.com>
5*4882a593Smuzhiyun  *
6*4882a593Smuzhiyun  */
7*4882a593Smuzhiyun 
8*4882a593Smuzhiyun #include <linux/bpf.h>
9*4882a593Smuzhiyun #include <linux/filter.h>
10*4882a593Smuzhiyun #include "bpf_jit.h"
11*4882a593Smuzhiyun 
12*4882a593Smuzhiyun #define RV_REG_TCC RV_REG_A6
13*4882a593Smuzhiyun #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */
14*4882a593Smuzhiyun 
15*4882a593Smuzhiyun static const int regmap[] = {
16*4882a593Smuzhiyun 	[BPF_REG_0] =	RV_REG_A5,
17*4882a593Smuzhiyun 	[BPF_REG_1] =	RV_REG_A0,
18*4882a593Smuzhiyun 	[BPF_REG_2] =	RV_REG_A1,
19*4882a593Smuzhiyun 	[BPF_REG_3] =	RV_REG_A2,
20*4882a593Smuzhiyun 	[BPF_REG_4] =	RV_REG_A3,
21*4882a593Smuzhiyun 	[BPF_REG_5] =	RV_REG_A4,
22*4882a593Smuzhiyun 	[BPF_REG_6] =	RV_REG_S1,
23*4882a593Smuzhiyun 	[BPF_REG_7] =	RV_REG_S2,
24*4882a593Smuzhiyun 	[BPF_REG_8] =	RV_REG_S3,
25*4882a593Smuzhiyun 	[BPF_REG_9] =	RV_REG_S4,
26*4882a593Smuzhiyun 	[BPF_REG_FP] =	RV_REG_S5,
27*4882a593Smuzhiyun 	[BPF_REG_AX] =	RV_REG_T0,
28*4882a593Smuzhiyun };
29*4882a593Smuzhiyun 
30*4882a593Smuzhiyun enum {
31*4882a593Smuzhiyun 	RV_CTX_F_SEEN_TAIL_CALL =	0,
32*4882a593Smuzhiyun 	RV_CTX_F_SEEN_CALL =		RV_REG_RA,
33*4882a593Smuzhiyun 	RV_CTX_F_SEEN_S1 =		RV_REG_S1,
34*4882a593Smuzhiyun 	RV_CTX_F_SEEN_S2 =		RV_REG_S2,
35*4882a593Smuzhiyun 	RV_CTX_F_SEEN_S3 =		RV_REG_S3,
36*4882a593Smuzhiyun 	RV_CTX_F_SEEN_S4 =		RV_REG_S4,
37*4882a593Smuzhiyun 	RV_CTX_F_SEEN_S5 =		RV_REG_S5,
38*4882a593Smuzhiyun 	RV_CTX_F_SEEN_S6 =		RV_REG_S6,
39*4882a593Smuzhiyun };
40*4882a593Smuzhiyun 
bpf_to_rv_reg(int bpf_reg,struct rv_jit_context * ctx)41*4882a593Smuzhiyun static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx)
42*4882a593Smuzhiyun {
43*4882a593Smuzhiyun 	u8 reg = regmap[bpf_reg];
44*4882a593Smuzhiyun 
45*4882a593Smuzhiyun 	switch (reg) {
46*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S1:
47*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S2:
48*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S3:
49*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S4:
50*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S5:
51*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S6:
52*4882a593Smuzhiyun 		__set_bit(reg, &ctx->flags);
53*4882a593Smuzhiyun 	}
54*4882a593Smuzhiyun 	return reg;
55*4882a593Smuzhiyun };
56*4882a593Smuzhiyun 
seen_reg(int reg,struct rv_jit_context * ctx)57*4882a593Smuzhiyun static bool seen_reg(int reg, struct rv_jit_context *ctx)
58*4882a593Smuzhiyun {
59*4882a593Smuzhiyun 	switch (reg) {
60*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_CALL:
61*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S1:
62*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S2:
63*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S3:
64*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S4:
65*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S5:
66*4882a593Smuzhiyun 	case RV_CTX_F_SEEN_S6:
67*4882a593Smuzhiyun 		return test_bit(reg, &ctx->flags);
68*4882a593Smuzhiyun 	}
69*4882a593Smuzhiyun 	return false;
70*4882a593Smuzhiyun }
71*4882a593Smuzhiyun 
mark_fp(struct rv_jit_context * ctx)72*4882a593Smuzhiyun static void mark_fp(struct rv_jit_context *ctx)
73*4882a593Smuzhiyun {
74*4882a593Smuzhiyun 	__set_bit(RV_CTX_F_SEEN_S5, &ctx->flags);
75*4882a593Smuzhiyun }
76*4882a593Smuzhiyun 
mark_call(struct rv_jit_context * ctx)77*4882a593Smuzhiyun static void mark_call(struct rv_jit_context *ctx)
78*4882a593Smuzhiyun {
79*4882a593Smuzhiyun 	__set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
80*4882a593Smuzhiyun }
81*4882a593Smuzhiyun 
seen_call(struct rv_jit_context * ctx)82*4882a593Smuzhiyun static bool seen_call(struct rv_jit_context *ctx)
83*4882a593Smuzhiyun {
84*4882a593Smuzhiyun 	return test_bit(RV_CTX_F_SEEN_CALL, &ctx->flags);
85*4882a593Smuzhiyun }
86*4882a593Smuzhiyun 
mark_tail_call(struct rv_jit_context * ctx)87*4882a593Smuzhiyun static void mark_tail_call(struct rv_jit_context *ctx)
88*4882a593Smuzhiyun {
89*4882a593Smuzhiyun 	__set_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
90*4882a593Smuzhiyun }
91*4882a593Smuzhiyun 
seen_tail_call(struct rv_jit_context * ctx)92*4882a593Smuzhiyun static bool seen_tail_call(struct rv_jit_context *ctx)
93*4882a593Smuzhiyun {
94*4882a593Smuzhiyun 	return test_bit(RV_CTX_F_SEEN_TAIL_CALL, &ctx->flags);
95*4882a593Smuzhiyun }
96*4882a593Smuzhiyun 
rv_tail_call_reg(struct rv_jit_context * ctx)97*4882a593Smuzhiyun static u8 rv_tail_call_reg(struct rv_jit_context *ctx)
98*4882a593Smuzhiyun {
99*4882a593Smuzhiyun 	mark_tail_call(ctx);
100*4882a593Smuzhiyun 
101*4882a593Smuzhiyun 	if (seen_call(ctx)) {
102*4882a593Smuzhiyun 		__set_bit(RV_CTX_F_SEEN_S6, &ctx->flags);
103*4882a593Smuzhiyun 		return RV_REG_S6;
104*4882a593Smuzhiyun 	}
105*4882a593Smuzhiyun 	return RV_REG_A6;
106*4882a593Smuzhiyun }
107*4882a593Smuzhiyun 
is_32b_int(s64 val)108*4882a593Smuzhiyun static bool is_32b_int(s64 val)
109*4882a593Smuzhiyun {
110*4882a593Smuzhiyun 	return -(1L << 31) <= val && val < (1L << 31);
111*4882a593Smuzhiyun }
112*4882a593Smuzhiyun 
in_auipc_jalr_range(s64 val)113*4882a593Smuzhiyun static bool in_auipc_jalr_range(s64 val)
114*4882a593Smuzhiyun {
115*4882a593Smuzhiyun 	/*
116*4882a593Smuzhiyun 	 * auipc+jalr can reach any signed PC-relative offset in the range
117*4882a593Smuzhiyun 	 * [-2^31 - 2^11, 2^31 - 2^11).
118*4882a593Smuzhiyun 	 */
119*4882a593Smuzhiyun 	return (-(1L << 31) - (1L << 11)) <= val &&
120*4882a593Smuzhiyun 		val < ((1L << 31) - (1L << 11));
121*4882a593Smuzhiyun }
122*4882a593Smuzhiyun 
emit_imm(u8 rd,s64 val,struct rv_jit_context * ctx)123*4882a593Smuzhiyun static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx)
124*4882a593Smuzhiyun {
125*4882a593Smuzhiyun 	/* Note that the immediate from the add is sign-extended,
126*4882a593Smuzhiyun 	 * which means that we need to compensate this by adding 2^12,
127*4882a593Smuzhiyun 	 * when the 12th bit is set. A simpler way of doing this, and
128*4882a593Smuzhiyun 	 * getting rid of the check, is to just add 2**11 before the
129*4882a593Smuzhiyun 	 * shift. The "Loading a 32-Bit constant" example from the
130*4882a593Smuzhiyun 	 * "Computer Organization and Design, RISC-V edition" book by
131*4882a593Smuzhiyun 	 * Patterson/Hennessy highlights this fact.
132*4882a593Smuzhiyun 	 *
133*4882a593Smuzhiyun 	 * This also means that we need to process LSB to MSB.
134*4882a593Smuzhiyun 	 */
135*4882a593Smuzhiyun 	s64 upper = (val + (1 << 11)) >> 12;
136*4882a593Smuzhiyun 	/* Sign-extend lower 12 bits to 64 bits since immediates for li, addiw,
137*4882a593Smuzhiyun 	 * and addi are signed and RVC checks will perform signed comparisons.
138*4882a593Smuzhiyun 	 */
139*4882a593Smuzhiyun 	s64 lower = ((val & 0xfff) << 52) >> 52;
140*4882a593Smuzhiyun 	int shift;
141*4882a593Smuzhiyun 
142*4882a593Smuzhiyun 	if (is_32b_int(val)) {
143*4882a593Smuzhiyun 		if (upper)
144*4882a593Smuzhiyun 			emit_lui(rd, upper, ctx);
145*4882a593Smuzhiyun 
146*4882a593Smuzhiyun 		if (!upper) {
147*4882a593Smuzhiyun 			emit_li(rd, lower, ctx);
148*4882a593Smuzhiyun 			return;
149*4882a593Smuzhiyun 		}
150*4882a593Smuzhiyun 
151*4882a593Smuzhiyun 		emit_addiw(rd, rd, lower, ctx);
152*4882a593Smuzhiyun 		return;
153*4882a593Smuzhiyun 	}
154*4882a593Smuzhiyun 
155*4882a593Smuzhiyun 	shift = __ffs(upper);
156*4882a593Smuzhiyun 	upper >>= shift;
157*4882a593Smuzhiyun 	shift += 12;
158*4882a593Smuzhiyun 
159*4882a593Smuzhiyun 	emit_imm(rd, upper, ctx);
160*4882a593Smuzhiyun 
161*4882a593Smuzhiyun 	emit_slli(rd, rd, shift, ctx);
162*4882a593Smuzhiyun 	if (lower)
163*4882a593Smuzhiyun 		emit_addi(rd, rd, lower, ctx);
164*4882a593Smuzhiyun }
165*4882a593Smuzhiyun 
__build_epilogue(bool is_tail_call,struct rv_jit_context * ctx)166*4882a593Smuzhiyun static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
167*4882a593Smuzhiyun {
168*4882a593Smuzhiyun 	int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8;
169*4882a593Smuzhiyun 
170*4882a593Smuzhiyun 	if (seen_reg(RV_REG_RA, ctx)) {
171*4882a593Smuzhiyun 		emit_ld(RV_REG_RA, store_offset, RV_REG_SP, ctx);
172*4882a593Smuzhiyun 		store_offset -= 8;
173*4882a593Smuzhiyun 	}
174*4882a593Smuzhiyun 	emit_ld(RV_REG_FP, store_offset, RV_REG_SP, ctx);
175*4882a593Smuzhiyun 	store_offset -= 8;
176*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S1, ctx)) {
177*4882a593Smuzhiyun 		emit_ld(RV_REG_S1, store_offset, RV_REG_SP, ctx);
178*4882a593Smuzhiyun 		store_offset -= 8;
179*4882a593Smuzhiyun 	}
180*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S2, ctx)) {
181*4882a593Smuzhiyun 		emit_ld(RV_REG_S2, store_offset, RV_REG_SP, ctx);
182*4882a593Smuzhiyun 		store_offset -= 8;
183*4882a593Smuzhiyun 	}
184*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S3, ctx)) {
185*4882a593Smuzhiyun 		emit_ld(RV_REG_S3, store_offset, RV_REG_SP, ctx);
186*4882a593Smuzhiyun 		store_offset -= 8;
187*4882a593Smuzhiyun 	}
188*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S4, ctx)) {
189*4882a593Smuzhiyun 		emit_ld(RV_REG_S4, store_offset, RV_REG_SP, ctx);
190*4882a593Smuzhiyun 		store_offset -= 8;
191*4882a593Smuzhiyun 	}
192*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S5, ctx)) {
193*4882a593Smuzhiyun 		emit_ld(RV_REG_S5, store_offset, RV_REG_SP, ctx);
194*4882a593Smuzhiyun 		store_offset -= 8;
195*4882a593Smuzhiyun 	}
196*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S6, ctx)) {
197*4882a593Smuzhiyun 		emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx);
198*4882a593Smuzhiyun 		store_offset -= 8;
199*4882a593Smuzhiyun 	}
200*4882a593Smuzhiyun 
201*4882a593Smuzhiyun 	emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx);
202*4882a593Smuzhiyun 	/* Set return value. */
203*4882a593Smuzhiyun 	if (!is_tail_call)
204*4882a593Smuzhiyun 		emit_mv(RV_REG_A0, RV_REG_A5, ctx);
205*4882a593Smuzhiyun 	emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA,
206*4882a593Smuzhiyun 		  is_tail_call ? 4 : 0, /* skip TCC init */
207*4882a593Smuzhiyun 		  ctx);
208*4882a593Smuzhiyun }
209*4882a593Smuzhiyun 
emit_bcc(u8 cond,u8 rd,u8 rs,int rvoff,struct rv_jit_context * ctx)210*4882a593Smuzhiyun static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff,
211*4882a593Smuzhiyun 		     struct rv_jit_context *ctx)
212*4882a593Smuzhiyun {
213*4882a593Smuzhiyun 	switch (cond) {
214*4882a593Smuzhiyun 	case BPF_JEQ:
215*4882a593Smuzhiyun 		emit(rv_beq(rd, rs, rvoff >> 1), ctx);
216*4882a593Smuzhiyun 		return;
217*4882a593Smuzhiyun 	case BPF_JGT:
218*4882a593Smuzhiyun 		emit(rv_bltu(rs, rd, rvoff >> 1), ctx);
219*4882a593Smuzhiyun 		return;
220*4882a593Smuzhiyun 	case BPF_JLT:
221*4882a593Smuzhiyun 		emit(rv_bltu(rd, rs, rvoff >> 1), ctx);
222*4882a593Smuzhiyun 		return;
223*4882a593Smuzhiyun 	case BPF_JGE:
224*4882a593Smuzhiyun 		emit(rv_bgeu(rd, rs, rvoff >> 1), ctx);
225*4882a593Smuzhiyun 		return;
226*4882a593Smuzhiyun 	case BPF_JLE:
227*4882a593Smuzhiyun 		emit(rv_bgeu(rs, rd, rvoff >> 1), ctx);
228*4882a593Smuzhiyun 		return;
229*4882a593Smuzhiyun 	case BPF_JNE:
230*4882a593Smuzhiyun 		emit(rv_bne(rd, rs, rvoff >> 1), ctx);
231*4882a593Smuzhiyun 		return;
232*4882a593Smuzhiyun 	case BPF_JSGT:
233*4882a593Smuzhiyun 		emit(rv_blt(rs, rd, rvoff >> 1), ctx);
234*4882a593Smuzhiyun 		return;
235*4882a593Smuzhiyun 	case BPF_JSLT:
236*4882a593Smuzhiyun 		emit(rv_blt(rd, rs, rvoff >> 1), ctx);
237*4882a593Smuzhiyun 		return;
238*4882a593Smuzhiyun 	case BPF_JSGE:
239*4882a593Smuzhiyun 		emit(rv_bge(rd, rs, rvoff >> 1), ctx);
240*4882a593Smuzhiyun 		return;
241*4882a593Smuzhiyun 	case BPF_JSLE:
242*4882a593Smuzhiyun 		emit(rv_bge(rs, rd, rvoff >> 1), ctx);
243*4882a593Smuzhiyun 	}
244*4882a593Smuzhiyun }
245*4882a593Smuzhiyun 
emit_branch(u8 cond,u8 rd,u8 rs,int rvoff,struct rv_jit_context * ctx)246*4882a593Smuzhiyun static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff,
247*4882a593Smuzhiyun 			struct rv_jit_context *ctx)
248*4882a593Smuzhiyun {
249*4882a593Smuzhiyun 	s64 upper, lower;
250*4882a593Smuzhiyun 
251*4882a593Smuzhiyun 	if (is_13b_int(rvoff)) {
252*4882a593Smuzhiyun 		emit_bcc(cond, rd, rs, rvoff, ctx);
253*4882a593Smuzhiyun 		return;
254*4882a593Smuzhiyun 	}
255*4882a593Smuzhiyun 
256*4882a593Smuzhiyun 	/* Adjust for jal */
257*4882a593Smuzhiyun 	rvoff -= 4;
258*4882a593Smuzhiyun 
259*4882a593Smuzhiyun 	/* Transform, e.g.:
260*4882a593Smuzhiyun 	 *   bne rd,rs,foo
261*4882a593Smuzhiyun 	 * to
262*4882a593Smuzhiyun 	 *   beq rd,rs,<.L1>
263*4882a593Smuzhiyun 	 *   (auipc foo)
264*4882a593Smuzhiyun 	 *   jal(r) foo
265*4882a593Smuzhiyun 	 * .L1
266*4882a593Smuzhiyun 	 */
267*4882a593Smuzhiyun 	cond = invert_bpf_cond(cond);
268*4882a593Smuzhiyun 	if (is_21b_int(rvoff)) {
269*4882a593Smuzhiyun 		emit_bcc(cond, rd, rs, 8, ctx);
270*4882a593Smuzhiyun 		emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx);
271*4882a593Smuzhiyun 		return;
272*4882a593Smuzhiyun 	}
273*4882a593Smuzhiyun 
274*4882a593Smuzhiyun 	/* 32b No need for an additional rvoff adjustment, since we
275*4882a593Smuzhiyun 	 * get that from the auipc at PC', where PC = PC' + 4.
276*4882a593Smuzhiyun 	 */
277*4882a593Smuzhiyun 	upper = (rvoff + (1 << 11)) >> 12;
278*4882a593Smuzhiyun 	lower = rvoff & 0xfff;
279*4882a593Smuzhiyun 
280*4882a593Smuzhiyun 	emit_bcc(cond, rd, rs, 12, ctx);
281*4882a593Smuzhiyun 	emit(rv_auipc(RV_REG_T1, upper), ctx);
282*4882a593Smuzhiyun 	emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx);
283*4882a593Smuzhiyun }
284*4882a593Smuzhiyun 
emit_zext_32(u8 reg,struct rv_jit_context * ctx)285*4882a593Smuzhiyun static void emit_zext_32(u8 reg, struct rv_jit_context *ctx)
286*4882a593Smuzhiyun {
287*4882a593Smuzhiyun 	emit_slli(reg, reg, 32, ctx);
288*4882a593Smuzhiyun 	emit_srli(reg, reg, 32, ctx);
289*4882a593Smuzhiyun }
290*4882a593Smuzhiyun 
emit_bpf_tail_call(int insn,struct rv_jit_context * ctx)291*4882a593Smuzhiyun static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
292*4882a593Smuzhiyun {
293*4882a593Smuzhiyun 	int tc_ninsn, off, start_insn = ctx->ninsns;
294*4882a593Smuzhiyun 	u8 tcc = rv_tail_call_reg(ctx);
295*4882a593Smuzhiyun 
296*4882a593Smuzhiyun 	/* a0: &ctx
297*4882a593Smuzhiyun 	 * a1: &array
298*4882a593Smuzhiyun 	 * a2: index
299*4882a593Smuzhiyun 	 *
300*4882a593Smuzhiyun 	 * if (index >= array->map.max_entries)
301*4882a593Smuzhiyun 	 *	goto out;
302*4882a593Smuzhiyun 	 */
303*4882a593Smuzhiyun 	tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] :
304*4882a593Smuzhiyun 		   ctx->offset[0];
305*4882a593Smuzhiyun 	emit_zext_32(RV_REG_A2, ctx);
306*4882a593Smuzhiyun 
307*4882a593Smuzhiyun 	off = offsetof(struct bpf_array, map.max_entries);
308*4882a593Smuzhiyun 	if (is_12b_check(off, insn))
309*4882a593Smuzhiyun 		return -1;
310*4882a593Smuzhiyun 	emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx);
311*4882a593Smuzhiyun 	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
312*4882a593Smuzhiyun 	emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx);
313*4882a593Smuzhiyun 
314*4882a593Smuzhiyun 	/* if (TCC-- < 0)
315*4882a593Smuzhiyun 	 *     goto out;
316*4882a593Smuzhiyun 	 */
317*4882a593Smuzhiyun 	emit_addi(RV_REG_T1, tcc, -1, ctx);
318*4882a593Smuzhiyun 	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
319*4882a593Smuzhiyun 	emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx);
320*4882a593Smuzhiyun 
321*4882a593Smuzhiyun 	/* prog = array->ptrs[index];
322*4882a593Smuzhiyun 	 * if (!prog)
323*4882a593Smuzhiyun 	 *     goto out;
324*4882a593Smuzhiyun 	 */
325*4882a593Smuzhiyun 	emit_slli(RV_REG_T2, RV_REG_A2, 3, ctx);
326*4882a593Smuzhiyun 	emit_add(RV_REG_T2, RV_REG_T2, RV_REG_A1, ctx);
327*4882a593Smuzhiyun 	off = offsetof(struct bpf_array, ptrs);
328*4882a593Smuzhiyun 	if (is_12b_check(off, insn))
329*4882a593Smuzhiyun 		return -1;
330*4882a593Smuzhiyun 	emit_ld(RV_REG_T2, off, RV_REG_T2, ctx);
331*4882a593Smuzhiyun 	off = ninsns_rvoff(tc_ninsn - (ctx->ninsns - start_insn));
332*4882a593Smuzhiyun 	emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx);
333*4882a593Smuzhiyun 
334*4882a593Smuzhiyun 	/* goto *(prog->bpf_func + 4); */
335*4882a593Smuzhiyun 	off = offsetof(struct bpf_prog, bpf_func);
336*4882a593Smuzhiyun 	if (is_12b_check(off, insn))
337*4882a593Smuzhiyun 		return -1;
338*4882a593Smuzhiyun 	emit_ld(RV_REG_T3, off, RV_REG_T2, ctx);
339*4882a593Smuzhiyun 	emit_mv(RV_REG_TCC, RV_REG_T1, ctx);
340*4882a593Smuzhiyun 	__build_epilogue(true, ctx);
341*4882a593Smuzhiyun 	return 0;
342*4882a593Smuzhiyun }
343*4882a593Smuzhiyun 
init_regs(u8 * rd,u8 * rs,const struct bpf_insn * insn,struct rv_jit_context * ctx)344*4882a593Smuzhiyun static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn,
345*4882a593Smuzhiyun 		      struct rv_jit_context *ctx)
346*4882a593Smuzhiyun {
347*4882a593Smuzhiyun 	u8 code = insn->code;
348*4882a593Smuzhiyun 
349*4882a593Smuzhiyun 	switch (code) {
350*4882a593Smuzhiyun 	case BPF_JMP | BPF_JA:
351*4882a593Smuzhiyun 	case BPF_JMP | BPF_CALL:
352*4882a593Smuzhiyun 	case BPF_JMP | BPF_EXIT:
353*4882a593Smuzhiyun 	case BPF_JMP | BPF_TAIL_CALL:
354*4882a593Smuzhiyun 		break;
355*4882a593Smuzhiyun 	default:
356*4882a593Smuzhiyun 		*rd = bpf_to_rv_reg(insn->dst_reg, ctx);
357*4882a593Smuzhiyun 	}
358*4882a593Smuzhiyun 
359*4882a593Smuzhiyun 	if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) ||
360*4882a593Smuzhiyun 	    code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) ||
361*4882a593Smuzhiyun 	    code & BPF_LDX || code & BPF_STX)
362*4882a593Smuzhiyun 		*rs = bpf_to_rv_reg(insn->src_reg, ctx);
363*4882a593Smuzhiyun }
364*4882a593Smuzhiyun 
emit_zext_32_rd_rs(u8 * rd,u8 * rs,struct rv_jit_context * ctx)365*4882a593Smuzhiyun static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
366*4882a593Smuzhiyun {
367*4882a593Smuzhiyun 	emit_mv(RV_REG_T2, *rd, ctx);
368*4882a593Smuzhiyun 	emit_zext_32(RV_REG_T2, ctx);
369*4882a593Smuzhiyun 	emit_mv(RV_REG_T1, *rs, ctx);
370*4882a593Smuzhiyun 	emit_zext_32(RV_REG_T1, ctx);
371*4882a593Smuzhiyun 	*rd = RV_REG_T2;
372*4882a593Smuzhiyun 	*rs = RV_REG_T1;
373*4882a593Smuzhiyun }
374*4882a593Smuzhiyun 
emit_sext_32_rd_rs(u8 * rd,u8 * rs,struct rv_jit_context * ctx)375*4882a593Smuzhiyun static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx)
376*4882a593Smuzhiyun {
377*4882a593Smuzhiyun 	emit_addiw(RV_REG_T2, *rd, 0, ctx);
378*4882a593Smuzhiyun 	emit_addiw(RV_REG_T1, *rs, 0, ctx);
379*4882a593Smuzhiyun 	*rd = RV_REG_T2;
380*4882a593Smuzhiyun 	*rs = RV_REG_T1;
381*4882a593Smuzhiyun }
382*4882a593Smuzhiyun 
emit_zext_32_rd_t1(u8 * rd,struct rv_jit_context * ctx)383*4882a593Smuzhiyun static void emit_zext_32_rd_t1(u8 *rd, struct rv_jit_context *ctx)
384*4882a593Smuzhiyun {
385*4882a593Smuzhiyun 	emit_mv(RV_REG_T2, *rd, ctx);
386*4882a593Smuzhiyun 	emit_zext_32(RV_REG_T2, ctx);
387*4882a593Smuzhiyun 	emit_zext_32(RV_REG_T1, ctx);
388*4882a593Smuzhiyun 	*rd = RV_REG_T2;
389*4882a593Smuzhiyun }
390*4882a593Smuzhiyun 
emit_sext_32_rd(u8 * rd,struct rv_jit_context * ctx)391*4882a593Smuzhiyun static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx)
392*4882a593Smuzhiyun {
393*4882a593Smuzhiyun 	emit_addiw(RV_REG_T2, *rd, 0, ctx);
394*4882a593Smuzhiyun 	*rd = RV_REG_T2;
395*4882a593Smuzhiyun }
396*4882a593Smuzhiyun 
emit_jump_and_link(u8 rd,s64 rvoff,bool force_jalr,struct rv_jit_context * ctx)397*4882a593Smuzhiyun static int emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr,
398*4882a593Smuzhiyun 			      struct rv_jit_context *ctx)
399*4882a593Smuzhiyun {
400*4882a593Smuzhiyun 	s64 upper, lower;
401*4882a593Smuzhiyun 
402*4882a593Smuzhiyun 	if (rvoff && is_21b_int(rvoff) && !force_jalr) {
403*4882a593Smuzhiyun 		emit(rv_jal(rd, rvoff >> 1), ctx);
404*4882a593Smuzhiyun 		return 0;
405*4882a593Smuzhiyun 	} else if (in_auipc_jalr_range(rvoff)) {
406*4882a593Smuzhiyun 		upper = (rvoff + (1 << 11)) >> 12;
407*4882a593Smuzhiyun 		lower = rvoff & 0xfff;
408*4882a593Smuzhiyun 		emit(rv_auipc(RV_REG_T1, upper), ctx);
409*4882a593Smuzhiyun 		emit(rv_jalr(rd, RV_REG_T1, lower), ctx);
410*4882a593Smuzhiyun 		return 0;
411*4882a593Smuzhiyun 	}
412*4882a593Smuzhiyun 
413*4882a593Smuzhiyun 	pr_err("bpf-jit: target offset 0x%llx is out of range\n", rvoff);
414*4882a593Smuzhiyun 	return -ERANGE;
415*4882a593Smuzhiyun }
416*4882a593Smuzhiyun 
is_signed_bpf_cond(u8 cond)417*4882a593Smuzhiyun static bool is_signed_bpf_cond(u8 cond)
418*4882a593Smuzhiyun {
419*4882a593Smuzhiyun 	return cond == BPF_JSGT || cond == BPF_JSLT ||
420*4882a593Smuzhiyun 		cond == BPF_JSGE || cond == BPF_JSLE;
421*4882a593Smuzhiyun }
422*4882a593Smuzhiyun 
emit_call(bool fixed,u64 addr,struct rv_jit_context * ctx)423*4882a593Smuzhiyun static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx)
424*4882a593Smuzhiyun {
425*4882a593Smuzhiyun 	s64 off = 0;
426*4882a593Smuzhiyun 	u64 ip;
427*4882a593Smuzhiyun 	u8 rd;
428*4882a593Smuzhiyun 	int ret;
429*4882a593Smuzhiyun 
430*4882a593Smuzhiyun 	if (addr && ctx->insns) {
431*4882a593Smuzhiyun 		ip = (u64)(long)(ctx->insns + ctx->ninsns);
432*4882a593Smuzhiyun 		off = addr - ip;
433*4882a593Smuzhiyun 	}
434*4882a593Smuzhiyun 
435*4882a593Smuzhiyun 	ret = emit_jump_and_link(RV_REG_RA, off, !fixed, ctx);
436*4882a593Smuzhiyun 	if (ret)
437*4882a593Smuzhiyun 		return ret;
438*4882a593Smuzhiyun 	rd = bpf_to_rv_reg(BPF_REG_0, ctx);
439*4882a593Smuzhiyun 	emit_mv(rd, RV_REG_A0, ctx);
440*4882a593Smuzhiyun 	return 0;
441*4882a593Smuzhiyun }
442*4882a593Smuzhiyun 
bpf_jit_emit_insn(const struct bpf_insn * insn,struct rv_jit_context * ctx,bool extra_pass)443*4882a593Smuzhiyun int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
444*4882a593Smuzhiyun 		      bool extra_pass)
445*4882a593Smuzhiyun {
446*4882a593Smuzhiyun 	bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 ||
447*4882a593Smuzhiyun 		    BPF_CLASS(insn->code) == BPF_JMP;
448*4882a593Smuzhiyun 	int s, e, rvoff, ret, i = insn - ctx->prog->insnsi;
449*4882a593Smuzhiyun 	struct bpf_prog_aux *aux = ctx->prog->aux;
450*4882a593Smuzhiyun 	u8 rd = -1, rs = -1, code = insn->code;
451*4882a593Smuzhiyun 	s16 off = insn->off;
452*4882a593Smuzhiyun 	s32 imm = insn->imm;
453*4882a593Smuzhiyun 
454*4882a593Smuzhiyun 	init_regs(&rd, &rs, insn, ctx);
455*4882a593Smuzhiyun 
456*4882a593Smuzhiyun 	switch (code) {
457*4882a593Smuzhiyun 	/* dst = src */
458*4882a593Smuzhiyun 	case BPF_ALU | BPF_MOV | BPF_X:
459*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_MOV | BPF_X:
460*4882a593Smuzhiyun 		if (imm == 1) {
461*4882a593Smuzhiyun 			/* Special mov32 for zext */
462*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
463*4882a593Smuzhiyun 			break;
464*4882a593Smuzhiyun 		}
465*4882a593Smuzhiyun 		emit_mv(rd, rs, ctx);
466*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
467*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
468*4882a593Smuzhiyun 		break;
469*4882a593Smuzhiyun 
470*4882a593Smuzhiyun 	/* dst = dst OP src */
471*4882a593Smuzhiyun 	case BPF_ALU | BPF_ADD | BPF_X:
472*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_ADD | BPF_X:
473*4882a593Smuzhiyun 		emit_add(rd, rd, rs, ctx);
474*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
475*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
476*4882a593Smuzhiyun 		break;
477*4882a593Smuzhiyun 	case BPF_ALU | BPF_SUB | BPF_X:
478*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_SUB | BPF_X:
479*4882a593Smuzhiyun 		if (is64)
480*4882a593Smuzhiyun 			emit_sub(rd, rd, rs, ctx);
481*4882a593Smuzhiyun 		else
482*4882a593Smuzhiyun 			emit_subw(rd, rd, rs, ctx);
483*4882a593Smuzhiyun 
484*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
485*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
486*4882a593Smuzhiyun 		break;
487*4882a593Smuzhiyun 	case BPF_ALU | BPF_AND | BPF_X:
488*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_AND | BPF_X:
489*4882a593Smuzhiyun 		emit_and(rd, rd, rs, ctx);
490*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
491*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
492*4882a593Smuzhiyun 		break;
493*4882a593Smuzhiyun 	case BPF_ALU | BPF_OR | BPF_X:
494*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_OR | BPF_X:
495*4882a593Smuzhiyun 		emit_or(rd, rd, rs, ctx);
496*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
497*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
498*4882a593Smuzhiyun 		break;
499*4882a593Smuzhiyun 	case BPF_ALU | BPF_XOR | BPF_X:
500*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_XOR | BPF_X:
501*4882a593Smuzhiyun 		emit_xor(rd, rd, rs, ctx);
502*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
503*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
504*4882a593Smuzhiyun 		break;
505*4882a593Smuzhiyun 	case BPF_ALU | BPF_MUL | BPF_X:
506*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_MUL | BPF_X:
507*4882a593Smuzhiyun 		emit(is64 ? rv_mul(rd, rd, rs) : rv_mulw(rd, rd, rs), ctx);
508*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
509*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
510*4882a593Smuzhiyun 		break;
511*4882a593Smuzhiyun 	case BPF_ALU | BPF_DIV | BPF_X:
512*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_DIV | BPF_X:
513*4882a593Smuzhiyun 		emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx);
514*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
515*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
516*4882a593Smuzhiyun 		break;
517*4882a593Smuzhiyun 	case BPF_ALU | BPF_MOD | BPF_X:
518*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_MOD | BPF_X:
519*4882a593Smuzhiyun 		emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx);
520*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
521*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
522*4882a593Smuzhiyun 		break;
523*4882a593Smuzhiyun 	case BPF_ALU | BPF_LSH | BPF_X:
524*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_LSH | BPF_X:
525*4882a593Smuzhiyun 		emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
526*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
527*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
528*4882a593Smuzhiyun 		break;
529*4882a593Smuzhiyun 	case BPF_ALU | BPF_RSH | BPF_X:
530*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_RSH | BPF_X:
531*4882a593Smuzhiyun 		emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx);
532*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
533*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
534*4882a593Smuzhiyun 		break;
535*4882a593Smuzhiyun 	case BPF_ALU | BPF_ARSH | BPF_X:
536*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_ARSH | BPF_X:
537*4882a593Smuzhiyun 		emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx);
538*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
539*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
540*4882a593Smuzhiyun 		break;
541*4882a593Smuzhiyun 
542*4882a593Smuzhiyun 	/* dst = -dst */
543*4882a593Smuzhiyun 	case BPF_ALU | BPF_NEG:
544*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_NEG:
545*4882a593Smuzhiyun 		emit_sub(rd, RV_REG_ZERO, rd, ctx);
546*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
547*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
548*4882a593Smuzhiyun 		break;
549*4882a593Smuzhiyun 
550*4882a593Smuzhiyun 	/* dst = BSWAP##imm(dst) */
551*4882a593Smuzhiyun 	case BPF_ALU | BPF_END | BPF_FROM_LE:
552*4882a593Smuzhiyun 		switch (imm) {
553*4882a593Smuzhiyun 		case 16:
554*4882a593Smuzhiyun 			emit_slli(rd, rd, 48, ctx);
555*4882a593Smuzhiyun 			emit_srli(rd, rd, 48, ctx);
556*4882a593Smuzhiyun 			break;
557*4882a593Smuzhiyun 		case 32:
558*4882a593Smuzhiyun 			if (!aux->verifier_zext)
559*4882a593Smuzhiyun 				emit_zext_32(rd, ctx);
560*4882a593Smuzhiyun 			break;
561*4882a593Smuzhiyun 		case 64:
562*4882a593Smuzhiyun 			/* Do nothing */
563*4882a593Smuzhiyun 			break;
564*4882a593Smuzhiyun 		}
565*4882a593Smuzhiyun 		break;
566*4882a593Smuzhiyun 
567*4882a593Smuzhiyun 	case BPF_ALU | BPF_END | BPF_FROM_BE:
568*4882a593Smuzhiyun 		emit_li(RV_REG_T2, 0, ctx);
569*4882a593Smuzhiyun 
570*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
571*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
572*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
573*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
574*4882a593Smuzhiyun 		if (imm == 16)
575*4882a593Smuzhiyun 			goto out_be;
576*4882a593Smuzhiyun 
577*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
578*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
579*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
580*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
581*4882a593Smuzhiyun 
582*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
583*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
584*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
585*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
586*4882a593Smuzhiyun 		if (imm == 32)
587*4882a593Smuzhiyun 			goto out_be;
588*4882a593Smuzhiyun 
589*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
590*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
591*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
592*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
593*4882a593Smuzhiyun 
594*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
595*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
596*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
597*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
598*4882a593Smuzhiyun 
599*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
600*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
601*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
602*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
603*4882a593Smuzhiyun 
604*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
605*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
606*4882a593Smuzhiyun 		emit_slli(RV_REG_T2, RV_REG_T2, 8, ctx);
607*4882a593Smuzhiyun 		emit_srli(rd, rd, 8, ctx);
608*4882a593Smuzhiyun out_be:
609*4882a593Smuzhiyun 		emit_andi(RV_REG_T1, rd, 0xff, ctx);
610*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, RV_REG_T1, ctx);
611*4882a593Smuzhiyun 
612*4882a593Smuzhiyun 		emit_mv(rd, RV_REG_T2, ctx);
613*4882a593Smuzhiyun 		break;
614*4882a593Smuzhiyun 
615*4882a593Smuzhiyun 	/* dst = imm */
616*4882a593Smuzhiyun 	case BPF_ALU | BPF_MOV | BPF_K:
617*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_MOV | BPF_K:
618*4882a593Smuzhiyun 		emit_imm(rd, imm, ctx);
619*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
620*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
621*4882a593Smuzhiyun 		break;
622*4882a593Smuzhiyun 
623*4882a593Smuzhiyun 	/* dst = dst OP imm */
624*4882a593Smuzhiyun 	case BPF_ALU | BPF_ADD | BPF_K:
625*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_ADD | BPF_K:
626*4882a593Smuzhiyun 		if (is_12b_int(imm)) {
627*4882a593Smuzhiyun 			emit_addi(rd, rd, imm, ctx);
628*4882a593Smuzhiyun 		} else {
629*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
630*4882a593Smuzhiyun 			emit_add(rd, rd, RV_REG_T1, ctx);
631*4882a593Smuzhiyun 		}
632*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
633*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
634*4882a593Smuzhiyun 		break;
635*4882a593Smuzhiyun 	case BPF_ALU | BPF_SUB | BPF_K:
636*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_SUB | BPF_K:
637*4882a593Smuzhiyun 		if (is_12b_int(-imm)) {
638*4882a593Smuzhiyun 			emit_addi(rd, rd, -imm, ctx);
639*4882a593Smuzhiyun 		} else {
640*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
641*4882a593Smuzhiyun 			emit_sub(rd, rd, RV_REG_T1, ctx);
642*4882a593Smuzhiyun 		}
643*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
644*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
645*4882a593Smuzhiyun 		break;
646*4882a593Smuzhiyun 	case BPF_ALU | BPF_AND | BPF_K:
647*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_AND | BPF_K:
648*4882a593Smuzhiyun 		if (is_12b_int(imm)) {
649*4882a593Smuzhiyun 			emit_andi(rd, rd, imm, ctx);
650*4882a593Smuzhiyun 		} else {
651*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
652*4882a593Smuzhiyun 			emit_and(rd, rd, RV_REG_T1, ctx);
653*4882a593Smuzhiyun 		}
654*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
655*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
656*4882a593Smuzhiyun 		break;
657*4882a593Smuzhiyun 	case BPF_ALU | BPF_OR | BPF_K:
658*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_OR | BPF_K:
659*4882a593Smuzhiyun 		if (is_12b_int(imm)) {
660*4882a593Smuzhiyun 			emit(rv_ori(rd, rd, imm), ctx);
661*4882a593Smuzhiyun 		} else {
662*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
663*4882a593Smuzhiyun 			emit_or(rd, rd, RV_REG_T1, ctx);
664*4882a593Smuzhiyun 		}
665*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
666*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
667*4882a593Smuzhiyun 		break;
668*4882a593Smuzhiyun 	case BPF_ALU | BPF_XOR | BPF_K:
669*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_XOR | BPF_K:
670*4882a593Smuzhiyun 		if (is_12b_int(imm)) {
671*4882a593Smuzhiyun 			emit(rv_xori(rd, rd, imm), ctx);
672*4882a593Smuzhiyun 		} else {
673*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
674*4882a593Smuzhiyun 			emit_xor(rd, rd, RV_REG_T1, ctx);
675*4882a593Smuzhiyun 		}
676*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
677*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
678*4882a593Smuzhiyun 		break;
679*4882a593Smuzhiyun 	case BPF_ALU | BPF_MUL | BPF_K:
680*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_MUL | BPF_K:
681*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
682*4882a593Smuzhiyun 		emit(is64 ? rv_mul(rd, rd, RV_REG_T1) :
683*4882a593Smuzhiyun 		     rv_mulw(rd, rd, RV_REG_T1), ctx);
684*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
685*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
686*4882a593Smuzhiyun 		break;
687*4882a593Smuzhiyun 	case BPF_ALU | BPF_DIV | BPF_K:
688*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_DIV | BPF_K:
689*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
690*4882a593Smuzhiyun 		emit(is64 ? rv_divu(rd, rd, RV_REG_T1) :
691*4882a593Smuzhiyun 		     rv_divuw(rd, rd, RV_REG_T1), ctx);
692*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
693*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
694*4882a593Smuzhiyun 		break;
695*4882a593Smuzhiyun 	case BPF_ALU | BPF_MOD | BPF_K:
696*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_MOD | BPF_K:
697*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
698*4882a593Smuzhiyun 		emit(is64 ? rv_remu(rd, rd, RV_REG_T1) :
699*4882a593Smuzhiyun 		     rv_remuw(rd, rd, RV_REG_T1), ctx);
700*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
701*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
702*4882a593Smuzhiyun 		break;
703*4882a593Smuzhiyun 	case BPF_ALU | BPF_LSH | BPF_K:
704*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_LSH | BPF_K:
705*4882a593Smuzhiyun 		emit_slli(rd, rd, imm, ctx);
706*4882a593Smuzhiyun 
707*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
708*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
709*4882a593Smuzhiyun 		break;
710*4882a593Smuzhiyun 	case BPF_ALU | BPF_RSH | BPF_K:
711*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_RSH | BPF_K:
712*4882a593Smuzhiyun 		if (is64)
713*4882a593Smuzhiyun 			emit_srli(rd, rd, imm, ctx);
714*4882a593Smuzhiyun 		else
715*4882a593Smuzhiyun 			emit(rv_srliw(rd, rd, imm), ctx);
716*4882a593Smuzhiyun 
717*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
718*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
719*4882a593Smuzhiyun 		break;
720*4882a593Smuzhiyun 	case BPF_ALU | BPF_ARSH | BPF_K:
721*4882a593Smuzhiyun 	case BPF_ALU64 | BPF_ARSH | BPF_K:
722*4882a593Smuzhiyun 		if (is64)
723*4882a593Smuzhiyun 			emit_srai(rd, rd, imm, ctx);
724*4882a593Smuzhiyun 		else
725*4882a593Smuzhiyun 			emit(rv_sraiw(rd, rd, imm), ctx);
726*4882a593Smuzhiyun 
727*4882a593Smuzhiyun 		if (!is64 && !aux->verifier_zext)
728*4882a593Smuzhiyun 			emit_zext_32(rd, ctx);
729*4882a593Smuzhiyun 		break;
730*4882a593Smuzhiyun 
731*4882a593Smuzhiyun 	/* JUMP off */
732*4882a593Smuzhiyun 	case BPF_JMP | BPF_JA:
733*4882a593Smuzhiyun 		rvoff = rv_offset(i, off, ctx);
734*4882a593Smuzhiyun 		ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
735*4882a593Smuzhiyun 		if (ret)
736*4882a593Smuzhiyun 			return ret;
737*4882a593Smuzhiyun 		break;
738*4882a593Smuzhiyun 
739*4882a593Smuzhiyun 	/* IF (dst COND src) JUMP off */
740*4882a593Smuzhiyun 	case BPF_JMP | BPF_JEQ | BPF_X:
741*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JEQ | BPF_X:
742*4882a593Smuzhiyun 	case BPF_JMP | BPF_JGT | BPF_X:
743*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JGT | BPF_X:
744*4882a593Smuzhiyun 	case BPF_JMP | BPF_JLT | BPF_X:
745*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JLT | BPF_X:
746*4882a593Smuzhiyun 	case BPF_JMP | BPF_JGE | BPF_X:
747*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JGE | BPF_X:
748*4882a593Smuzhiyun 	case BPF_JMP | BPF_JLE | BPF_X:
749*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JLE | BPF_X:
750*4882a593Smuzhiyun 	case BPF_JMP | BPF_JNE | BPF_X:
751*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JNE | BPF_X:
752*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSGT | BPF_X:
753*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSGT | BPF_X:
754*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSLT | BPF_X:
755*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSLT | BPF_X:
756*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSGE | BPF_X:
757*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSGE | BPF_X:
758*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSLE | BPF_X:
759*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSLE | BPF_X:
760*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSET | BPF_X:
761*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSET | BPF_X:
762*4882a593Smuzhiyun 		rvoff = rv_offset(i, off, ctx);
763*4882a593Smuzhiyun 		if (!is64) {
764*4882a593Smuzhiyun 			s = ctx->ninsns;
765*4882a593Smuzhiyun 			if (is_signed_bpf_cond(BPF_OP(code)))
766*4882a593Smuzhiyun 				emit_sext_32_rd_rs(&rd, &rs, ctx);
767*4882a593Smuzhiyun 			else
768*4882a593Smuzhiyun 				emit_zext_32_rd_rs(&rd, &rs, ctx);
769*4882a593Smuzhiyun 			e = ctx->ninsns;
770*4882a593Smuzhiyun 
771*4882a593Smuzhiyun 			/* Adjust for extra insns */
772*4882a593Smuzhiyun 			rvoff -= ninsns_rvoff(e - s);
773*4882a593Smuzhiyun 		}
774*4882a593Smuzhiyun 
775*4882a593Smuzhiyun 		if (BPF_OP(code) == BPF_JSET) {
776*4882a593Smuzhiyun 			/* Adjust for and */
777*4882a593Smuzhiyun 			rvoff -= 4;
778*4882a593Smuzhiyun 			emit_and(RV_REG_T1, rd, rs, ctx);
779*4882a593Smuzhiyun 			emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
780*4882a593Smuzhiyun 				    ctx);
781*4882a593Smuzhiyun 		} else {
782*4882a593Smuzhiyun 			emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
783*4882a593Smuzhiyun 		}
784*4882a593Smuzhiyun 		break;
785*4882a593Smuzhiyun 
786*4882a593Smuzhiyun 	/* IF (dst COND imm) JUMP off */
787*4882a593Smuzhiyun 	case BPF_JMP | BPF_JEQ | BPF_K:
788*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JEQ | BPF_K:
789*4882a593Smuzhiyun 	case BPF_JMP | BPF_JGT | BPF_K:
790*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JGT | BPF_K:
791*4882a593Smuzhiyun 	case BPF_JMP | BPF_JLT | BPF_K:
792*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JLT | BPF_K:
793*4882a593Smuzhiyun 	case BPF_JMP | BPF_JGE | BPF_K:
794*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JGE | BPF_K:
795*4882a593Smuzhiyun 	case BPF_JMP | BPF_JLE | BPF_K:
796*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JLE | BPF_K:
797*4882a593Smuzhiyun 	case BPF_JMP | BPF_JNE | BPF_K:
798*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JNE | BPF_K:
799*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSGT | BPF_K:
800*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSGT | BPF_K:
801*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSLT | BPF_K:
802*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSLT | BPF_K:
803*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSGE | BPF_K:
804*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSGE | BPF_K:
805*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSLE | BPF_K:
806*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSLE | BPF_K:
807*4882a593Smuzhiyun 		rvoff = rv_offset(i, off, ctx);
808*4882a593Smuzhiyun 		s = ctx->ninsns;
809*4882a593Smuzhiyun 		if (imm) {
810*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
811*4882a593Smuzhiyun 			rs = RV_REG_T1;
812*4882a593Smuzhiyun 		} else {
813*4882a593Smuzhiyun 			/* If imm is 0, simply use zero register. */
814*4882a593Smuzhiyun 			rs = RV_REG_ZERO;
815*4882a593Smuzhiyun 		}
816*4882a593Smuzhiyun 		if (!is64) {
817*4882a593Smuzhiyun 			if (is_signed_bpf_cond(BPF_OP(code)))
818*4882a593Smuzhiyun 				emit_sext_32_rd(&rd, ctx);
819*4882a593Smuzhiyun 			else
820*4882a593Smuzhiyun 				emit_zext_32_rd_t1(&rd, ctx);
821*4882a593Smuzhiyun 		}
822*4882a593Smuzhiyun 		e = ctx->ninsns;
823*4882a593Smuzhiyun 
824*4882a593Smuzhiyun 		/* Adjust for extra insns */
825*4882a593Smuzhiyun 		rvoff -= ninsns_rvoff(e - s);
826*4882a593Smuzhiyun 		emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
827*4882a593Smuzhiyun 		break;
828*4882a593Smuzhiyun 
829*4882a593Smuzhiyun 	case BPF_JMP | BPF_JSET | BPF_K:
830*4882a593Smuzhiyun 	case BPF_JMP32 | BPF_JSET | BPF_K:
831*4882a593Smuzhiyun 		rvoff = rv_offset(i, off, ctx);
832*4882a593Smuzhiyun 		s = ctx->ninsns;
833*4882a593Smuzhiyun 		if (is_12b_int(imm)) {
834*4882a593Smuzhiyun 			emit_andi(RV_REG_T1, rd, imm, ctx);
835*4882a593Smuzhiyun 		} else {
836*4882a593Smuzhiyun 			emit_imm(RV_REG_T1, imm, ctx);
837*4882a593Smuzhiyun 			emit_and(RV_REG_T1, rd, RV_REG_T1, ctx);
838*4882a593Smuzhiyun 		}
839*4882a593Smuzhiyun 		/* For jset32, we should clear the upper 32 bits of t1, but
840*4882a593Smuzhiyun 		 * sign-extension is sufficient here and saves one instruction,
841*4882a593Smuzhiyun 		 * as t1 is used only in comparison against zero.
842*4882a593Smuzhiyun 		 */
843*4882a593Smuzhiyun 		if (!is64 && imm < 0)
844*4882a593Smuzhiyun 			emit_addiw(RV_REG_T1, RV_REG_T1, 0, ctx);
845*4882a593Smuzhiyun 		e = ctx->ninsns;
846*4882a593Smuzhiyun 		rvoff -= ninsns_rvoff(e - s);
847*4882a593Smuzhiyun 		emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
848*4882a593Smuzhiyun 		break;
849*4882a593Smuzhiyun 
850*4882a593Smuzhiyun 	/* function call */
851*4882a593Smuzhiyun 	case BPF_JMP | BPF_CALL:
852*4882a593Smuzhiyun 	{
853*4882a593Smuzhiyun 		bool fixed;
854*4882a593Smuzhiyun 		u64 addr;
855*4882a593Smuzhiyun 
856*4882a593Smuzhiyun 		mark_call(ctx);
857*4882a593Smuzhiyun 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr,
858*4882a593Smuzhiyun 					    &fixed);
859*4882a593Smuzhiyun 		if (ret < 0)
860*4882a593Smuzhiyun 			return ret;
861*4882a593Smuzhiyun 		ret = emit_call(fixed, addr, ctx);
862*4882a593Smuzhiyun 		if (ret)
863*4882a593Smuzhiyun 			return ret;
864*4882a593Smuzhiyun 		break;
865*4882a593Smuzhiyun 	}
866*4882a593Smuzhiyun 	/* tail call */
867*4882a593Smuzhiyun 	case BPF_JMP | BPF_TAIL_CALL:
868*4882a593Smuzhiyun 		if (emit_bpf_tail_call(i, ctx))
869*4882a593Smuzhiyun 			return -1;
870*4882a593Smuzhiyun 		break;
871*4882a593Smuzhiyun 
872*4882a593Smuzhiyun 	/* function return */
873*4882a593Smuzhiyun 	case BPF_JMP | BPF_EXIT:
874*4882a593Smuzhiyun 		if (i == ctx->prog->len - 1)
875*4882a593Smuzhiyun 			break;
876*4882a593Smuzhiyun 
877*4882a593Smuzhiyun 		rvoff = epilogue_offset(ctx);
878*4882a593Smuzhiyun 		ret = emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx);
879*4882a593Smuzhiyun 		if (ret)
880*4882a593Smuzhiyun 			return ret;
881*4882a593Smuzhiyun 		break;
882*4882a593Smuzhiyun 
883*4882a593Smuzhiyun 	/* dst = imm64 */
884*4882a593Smuzhiyun 	case BPF_LD | BPF_IMM | BPF_DW:
885*4882a593Smuzhiyun 	{
886*4882a593Smuzhiyun 		struct bpf_insn insn1 = insn[1];
887*4882a593Smuzhiyun 		u64 imm64;
888*4882a593Smuzhiyun 
889*4882a593Smuzhiyun 		imm64 = (u64)insn1.imm << 32 | (u32)imm;
890*4882a593Smuzhiyun 		emit_imm(rd, imm64, ctx);
891*4882a593Smuzhiyun 		return 1;
892*4882a593Smuzhiyun 	}
893*4882a593Smuzhiyun 
894*4882a593Smuzhiyun 	/* LDX: dst = *(size *)(src + off) */
895*4882a593Smuzhiyun 	case BPF_LDX | BPF_MEM | BPF_B:
896*4882a593Smuzhiyun 		if (is_12b_int(off)) {
897*4882a593Smuzhiyun 			emit(rv_lbu(rd, off, rs), ctx);
898*4882a593Smuzhiyun 			break;
899*4882a593Smuzhiyun 		}
900*4882a593Smuzhiyun 
901*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
902*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
903*4882a593Smuzhiyun 		emit(rv_lbu(rd, 0, RV_REG_T1), ctx);
904*4882a593Smuzhiyun 		if (insn_is_zext(&insn[1]))
905*4882a593Smuzhiyun 			return 1;
906*4882a593Smuzhiyun 		break;
907*4882a593Smuzhiyun 	case BPF_LDX | BPF_MEM | BPF_H:
908*4882a593Smuzhiyun 		if (is_12b_int(off)) {
909*4882a593Smuzhiyun 			emit(rv_lhu(rd, off, rs), ctx);
910*4882a593Smuzhiyun 			break;
911*4882a593Smuzhiyun 		}
912*4882a593Smuzhiyun 
913*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
914*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
915*4882a593Smuzhiyun 		emit(rv_lhu(rd, 0, RV_REG_T1), ctx);
916*4882a593Smuzhiyun 		if (insn_is_zext(&insn[1]))
917*4882a593Smuzhiyun 			return 1;
918*4882a593Smuzhiyun 		break;
919*4882a593Smuzhiyun 	case BPF_LDX | BPF_MEM | BPF_W:
920*4882a593Smuzhiyun 		if (is_12b_int(off)) {
921*4882a593Smuzhiyun 			emit(rv_lwu(rd, off, rs), ctx);
922*4882a593Smuzhiyun 			break;
923*4882a593Smuzhiyun 		}
924*4882a593Smuzhiyun 
925*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
926*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
927*4882a593Smuzhiyun 		emit(rv_lwu(rd, 0, RV_REG_T1), ctx);
928*4882a593Smuzhiyun 		if (insn_is_zext(&insn[1]))
929*4882a593Smuzhiyun 			return 1;
930*4882a593Smuzhiyun 		break;
931*4882a593Smuzhiyun 	case BPF_LDX | BPF_MEM | BPF_DW:
932*4882a593Smuzhiyun 		if (is_12b_int(off)) {
933*4882a593Smuzhiyun 			emit_ld(rd, off, rs, ctx);
934*4882a593Smuzhiyun 			break;
935*4882a593Smuzhiyun 		}
936*4882a593Smuzhiyun 
937*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
938*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rs, ctx);
939*4882a593Smuzhiyun 		emit_ld(rd, 0, RV_REG_T1, ctx);
940*4882a593Smuzhiyun 		break;
941*4882a593Smuzhiyun 
942*4882a593Smuzhiyun 	/* speculation barrier */
943*4882a593Smuzhiyun 	case BPF_ST | BPF_NOSPEC:
944*4882a593Smuzhiyun 		break;
945*4882a593Smuzhiyun 
946*4882a593Smuzhiyun 	/* ST: *(size *)(dst + off) = imm */
947*4882a593Smuzhiyun 	case BPF_ST | BPF_MEM | BPF_B:
948*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
949*4882a593Smuzhiyun 		if (is_12b_int(off)) {
950*4882a593Smuzhiyun 			emit(rv_sb(rd, off, RV_REG_T1), ctx);
951*4882a593Smuzhiyun 			break;
952*4882a593Smuzhiyun 		}
953*4882a593Smuzhiyun 
954*4882a593Smuzhiyun 		emit_imm(RV_REG_T2, off, ctx);
955*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
956*4882a593Smuzhiyun 		emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx);
957*4882a593Smuzhiyun 		break;
958*4882a593Smuzhiyun 
959*4882a593Smuzhiyun 	case BPF_ST | BPF_MEM | BPF_H:
960*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
961*4882a593Smuzhiyun 		if (is_12b_int(off)) {
962*4882a593Smuzhiyun 			emit(rv_sh(rd, off, RV_REG_T1), ctx);
963*4882a593Smuzhiyun 			break;
964*4882a593Smuzhiyun 		}
965*4882a593Smuzhiyun 
966*4882a593Smuzhiyun 		emit_imm(RV_REG_T2, off, ctx);
967*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
968*4882a593Smuzhiyun 		emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx);
969*4882a593Smuzhiyun 		break;
970*4882a593Smuzhiyun 	case BPF_ST | BPF_MEM | BPF_W:
971*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
972*4882a593Smuzhiyun 		if (is_12b_int(off)) {
973*4882a593Smuzhiyun 			emit_sw(rd, off, RV_REG_T1, ctx);
974*4882a593Smuzhiyun 			break;
975*4882a593Smuzhiyun 		}
976*4882a593Smuzhiyun 
977*4882a593Smuzhiyun 		emit_imm(RV_REG_T2, off, ctx);
978*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
979*4882a593Smuzhiyun 		emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx);
980*4882a593Smuzhiyun 		break;
981*4882a593Smuzhiyun 	case BPF_ST | BPF_MEM | BPF_DW:
982*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, imm, ctx);
983*4882a593Smuzhiyun 		if (is_12b_int(off)) {
984*4882a593Smuzhiyun 			emit_sd(rd, off, RV_REG_T1, ctx);
985*4882a593Smuzhiyun 			break;
986*4882a593Smuzhiyun 		}
987*4882a593Smuzhiyun 
988*4882a593Smuzhiyun 		emit_imm(RV_REG_T2, off, ctx);
989*4882a593Smuzhiyun 		emit_add(RV_REG_T2, RV_REG_T2, rd, ctx);
990*4882a593Smuzhiyun 		emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx);
991*4882a593Smuzhiyun 		break;
992*4882a593Smuzhiyun 
993*4882a593Smuzhiyun 	/* STX: *(size *)(dst + off) = src */
994*4882a593Smuzhiyun 	case BPF_STX | BPF_MEM | BPF_B:
995*4882a593Smuzhiyun 		if (is_12b_int(off)) {
996*4882a593Smuzhiyun 			emit(rv_sb(rd, off, rs), ctx);
997*4882a593Smuzhiyun 			break;
998*4882a593Smuzhiyun 		}
999*4882a593Smuzhiyun 
1000*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
1001*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1002*4882a593Smuzhiyun 		emit(rv_sb(RV_REG_T1, 0, rs), ctx);
1003*4882a593Smuzhiyun 		break;
1004*4882a593Smuzhiyun 	case BPF_STX | BPF_MEM | BPF_H:
1005*4882a593Smuzhiyun 		if (is_12b_int(off)) {
1006*4882a593Smuzhiyun 			emit(rv_sh(rd, off, rs), ctx);
1007*4882a593Smuzhiyun 			break;
1008*4882a593Smuzhiyun 		}
1009*4882a593Smuzhiyun 
1010*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
1011*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1012*4882a593Smuzhiyun 		emit(rv_sh(RV_REG_T1, 0, rs), ctx);
1013*4882a593Smuzhiyun 		break;
1014*4882a593Smuzhiyun 	case BPF_STX | BPF_MEM | BPF_W:
1015*4882a593Smuzhiyun 		if (is_12b_int(off)) {
1016*4882a593Smuzhiyun 			emit_sw(rd, off, rs, ctx);
1017*4882a593Smuzhiyun 			break;
1018*4882a593Smuzhiyun 		}
1019*4882a593Smuzhiyun 
1020*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
1021*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1022*4882a593Smuzhiyun 		emit_sw(RV_REG_T1, 0, rs, ctx);
1023*4882a593Smuzhiyun 		break;
1024*4882a593Smuzhiyun 	case BPF_STX | BPF_MEM | BPF_DW:
1025*4882a593Smuzhiyun 		if (is_12b_int(off)) {
1026*4882a593Smuzhiyun 			emit_sd(rd, off, rs, ctx);
1027*4882a593Smuzhiyun 			break;
1028*4882a593Smuzhiyun 		}
1029*4882a593Smuzhiyun 
1030*4882a593Smuzhiyun 		emit_imm(RV_REG_T1, off, ctx);
1031*4882a593Smuzhiyun 		emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1032*4882a593Smuzhiyun 		emit_sd(RV_REG_T1, 0, rs, ctx);
1033*4882a593Smuzhiyun 		break;
1034*4882a593Smuzhiyun 	/* STX XADD: lock *(u32 *)(dst + off) += src */
1035*4882a593Smuzhiyun 	case BPF_STX | BPF_XADD | BPF_W:
1036*4882a593Smuzhiyun 	/* STX XADD: lock *(u64 *)(dst + off) += src */
1037*4882a593Smuzhiyun 	case BPF_STX | BPF_XADD | BPF_DW:
1038*4882a593Smuzhiyun 		if (off) {
1039*4882a593Smuzhiyun 			if (is_12b_int(off)) {
1040*4882a593Smuzhiyun 				emit_addi(RV_REG_T1, rd, off, ctx);
1041*4882a593Smuzhiyun 			} else {
1042*4882a593Smuzhiyun 				emit_imm(RV_REG_T1, off, ctx);
1043*4882a593Smuzhiyun 				emit_add(RV_REG_T1, RV_REG_T1, rd, ctx);
1044*4882a593Smuzhiyun 			}
1045*4882a593Smuzhiyun 
1046*4882a593Smuzhiyun 			rd = RV_REG_T1;
1047*4882a593Smuzhiyun 		}
1048*4882a593Smuzhiyun 
1049*4882a593Smuzhiyun 		emit(BPF_SIZE(code) == BPF_W ?
1050*4882a593Smuzhiyun 		     rv_amoadd_w(RV_REG_ZERO, rs, rd, 0, 0) :
1051*4882a593Smuzhiyun 		     rv_amoadd_d(RV_REG_ZERO, rs, rd, 0, 0), ctx);
1052*4882a593Smuzhiyun 		break;
1053*4882a593Smuzhiyun 	default:
1054*4882a593Smuzhiyun 		pr_err("bpf-jit: unknown opcode %02x\n", code);
1055*4882a593Smuzhiyun 		return -EINVAL;
1056*4882a593Smuzhiyun 	}
1057*4882a593Smuzhiyun 
1058*4882a593Smuzhiyun 	return 0;
1059*4882a593Smuzhiyun }
1060*4882a593Smuzhiyun 
bpf_jit_build_prologue(struct rv_jit_context * ctx)1061*4882a593Smuzhiyun void bpf_jit_build_prologue(struct rv_jit_context *ctx)
1062*4882a593Smuzhiyun {
1063*4882a593Smuzhiyun 	int stack_adjust = 0, store_offset, bpf_stack_adjust;
1064*4882a593Smuzhiyun 
1065*4882a593Smuzhiyun 	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
1066*4882a593Smuzhiyun 	if (bpf_stack_adjust)
1067*4882a593Smuzhiyun 		mark_fp(ctx);
1068*4882a593Smuzhiyun 
1069*4882a593Smuzhiyun 	if (seen_reg(RV_REG_RA, ctx))
1070*4882a593Smuzhiyun 		stack_adjust += 8;
1071*4882a593Smuzhiyun 	stack_adjust += 8; /* RV_REG_FP */
1072*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S1, ctx))
1073*4882a593Smuzhiyun 		stack_adjust += 8;
1074*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S2, ctx))
1075*4882a593Smuzhiyun 		stack_adjust += 8;
1076*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S3, ctx))
1077*4882a593Smuzhiyun 		stack_adjust += 8;
1078*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S4, ctx))
1079*4882a593Smuzhiyun 		stack_adjust += 8;
1080*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S5, ctx))
1081*4882a593Smuzhiyun 		stack_adjust += 8;
1082*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S6, ctx))
1083*4882a593Smuzhiyun 		stack_adjust += 8;
1084*4882a593Smuzhiyun 
1085*4882a593Smuzhiyun 	stack_adjust = round_up(stack_adjust, 16);
1086*4882a593Smuzhiyun 	stack_adjust += bpf_stack_adjust;
1087*4882a593Smuzhiyun 
1088*4882a593Smuzhiyun 	store_offset = stack_adjust - 8;
1089*4882a593Smuzhiyun 
1090*4882a593Smuzhiyun 	/* First instruction is always setting the tail-call-counter
1091*4882a593Smuzhiyun 	 * (TCC) register. This instruction is skipped for tail calls.
1092*4882a593Smuzhiyun 	 * Force using a 4-byte (non-compressed) instruction.
1093*4882a593Smuzhiyun 	 */
1094*4882a593Smuzhiyun 	emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx);
1095*4882a593Smuzhiyun 
1096*4882a593Smuzhiyun 	emit_addi(RV_REG_SP, RV_REG_SP, -stack_adjust, ctx);
1097*4882a593Smuzhiyun 
1098*4882a593Smuzhiyun 	if (seen_reg(RV_REG_RA, ctx)) {
1099*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_RA, ctx);
1100*4882a593Smuzhiyun 		store_offset -= 8;
1101*4882a593Smuzhiyun 	}
1102*4882a593Smuzhiyun 	emit_sd(RV_REG_SP, store_offset, RV_REG_FP, ctx);
1103*4882a593Smuzhiyun 	store_offset -= 8;
1104*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S1, ctx)) {
1105*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_S1, ctx);
1106*4882a593Smuzhiyun 		store_offset -= 8;
1107*4882a593Smuzhiyun 	}
1108*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S2, ctx)) {
1109*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_S2, ctx);
1110*4882a593Smuzhiyun 		store_offset -= 8;
1111*4882a593Smuzhiyun 	}
1112*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S3, ctx)) {
1113*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_S3, ctx);
1114*4882a593Smuzhiyun 		store_offset -= 8;
1115*4882a593Smuzhiyun 	}
1116*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S4, ctx)) {
1117*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_S4, ctx);
1118*4882a593Smuzhiyun 		store_offset -= 8;
1119*4882a593Smuzhiyun 	}
1120*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S5, ctx)) {
1121*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_S5, ctx);
1122*4882a593Smuzhiyun 		store_offset -= 8;
1123*4882a593Smuzhiyun 	}
1124*4882a593Smuzhiyun 	if (seen_reg(RV_REG_S6, ctx)) {
1125*4882a593Smuzhiyun 		emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx);
1126*4882a593Smuzhiyun 		store_offset -= 8;
1127*4882a593Smuzhiyun 	}
1128*4882a593Smuzhiyun 
1129*4882a593Smuzhiyun 	emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx);
1130*4882a593Smuzhiyun 
1131*4882a593Smuzhiyun 	if (bpf_stack_adjust)
1132*4882a593Smuzhiyun 		emit_addi(RV_REG_S5, RV_REG_SP, bpf_stack_adjust, ctx);
1133*4882a593Smuzhiyun 
1134*4882a593Smuzhiyun 	/* Program contains calls and tail calls, so RV_REG_TCC need
1135*4882a593Smuzhiyun 	 * to be saved across calls.
1136*4882a593Smuzhiyun 	 */
1137*4882a593Smuzhiyun 	if (seen_tail_call(ctx) && seen_call(ctx))
1138*4882a593Smuzhiyun 		emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx);
1139*4882a593Smuzhiyun 
1140*4882a593Smuzhiyun 	ctx->stack_size = stack_adjust;
1141*4882a593Smuzhiyun }
1142*4882a593Smuzhiyun 
bpf_jit_build_epilogue(struct rv_jit_context * ctx)1143*4882a593Smuzhiyun void bpf_jit_build_epilogue(struct rv_jit_context *ctx)
1144*4882a593Smuzhiyun {
1145*4882a593Smuzhiyun 	__build_epilogue(false, ctx);
1146*4882a593Smuzhiyun }
1147*4882a593Smuzhiyun 
bpf_jit_alloc_exec(unsigned long size)1148*4882a593Smuzhiyun void *bpf_jit_alloc_exec(unsigned long size)
1149*4882a593Smuzhiyun {
1150*4882a593Smuzhiyun 	return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
1151*4882a593Smuzhiyun 				    BPF_JIT_REGION_END, GFP_KERNEL,
1152*4882a593Smuzhiyun 				    PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
1153*4882a593Smuzhiyun 				    __builtin_return_address(0));
1154*4882a593Smuzhiyun }
1155*4882a593Smuzhiyun 
bpf_jit_free_exec(void * addr)1156*4882a593Smuzhiyun void bpf_jit_free_exec(void *addr)
1157*4882a593Smuzhiyun {
1158*4882a593Smuzhiyun 	return vfree(addr);
1159*4882a593Smuzhiyun }
1160