xref: /OK3568_Linux_fs/kernel/kernel/bpf/verifier.c (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/bpf_verifier.h>
13 #include <linux/filter.h>
14 #include <net/netlink.h>
15 #include <linux/file.h>
16 #include <linux/vmalloc.h>
17 #include <linux/stringify.h>
18 #include <linux/bsearch.h>
19 #include <linux/sort.h>
20 #include <linux/perf_event.h>
21 #include <linux/ctype.h>
22 #include <linux/error-injection.h>
23 #include <linux/bpf_lsm.h>
24 #include <linux/btf_ids.h>
25 
26 #include "disasm.h"
27 
28 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
30 	[_id] = & _name ## _verifier_ops,
31 #define BPF_MAP_TYPE(_id, _ops)
32 #define BPF_LINK_TYPE(_id, _name)
33 #include <linux/bpf_types.h>
34 #undef BPF_PROG_TYPE
35 #undef BPF_MAP_TYPE
36 #undef BPF_LINK_TYPE
37 };
38 
39 /* bpf_check() is a static code analyzer that walks eBPF program
40  * instruction by instruction and updates register/stack state.
41  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
42  *
43  * The first pass is depth-first-search to check that the program is a DAG.
44  * It rejects the following programs:
45  * - larger than BPF_MAXINSNS insns
46  * - if loop is present (detected via back-edge)
47  * - unreachable insns exist (shouldn't be a forest. program = one function)
48  * - out of bounds or malformed jumps
49  * The second pass is all possible path descent from the 1st insn.
50  * Since it's analyzing all pathes through the program, the length of the
51  * analysis is limited to 64k insn, which may be hit even if total number of
52  * insn is less then 4K, but there are too many branches that change stack/regs.
53  * Number of 'branches to be analyzed' is limited to 1k
54  *
55  * On entry to each instruction, each register has a type, and the instruction
56  * changes the types of the registers depending on instruction semantics.
57  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
58  * copied to R1.
59  *
60  * All registers are 64-bit.
61  * R0 - return register
62  * R1-R5 argument passing registers
63  * R6-R9 callee saved registers
64  * R10 - frame pointer read-only
65  *
66  * At the start of BPF program the register R1 contains a pointer to bpf_context
67  * and has type PTR_TO_CTX.
68  *
69  * Verifier tracks arithmetic operations on pointers in case:
70  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
71  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
72  * 1st insn copies R10 (which has FRAME_PTR) type into R1
73  * and 2nd arithmetic instruction is pattern matched to recognize
74  * that it wants to construct a pointer to some element within stack.
75  * So after 2nd insn, the register R1 has type PTR_TO_STACK
76  * (and -20 constant is saved for further stack bounds checking).
77  * Meaning that this reg is a pointer to stack plus known immediate constant.
78  *
79  * Most of the time the registers have SCALAR_VALUE type, which
80  * means the register has some value, but it's not a valid pointer.
81  * (like pointer plus pointer becomes SCALAR_VALUE type)
82  *
83  * When verifier sees load or store instructions the type of base register
84  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
85  * four pointer types recognized by check_mem_access() function.
86  *
87  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
88  * and the range of [ptr, ptr + map's value_size) is accessible.
89  *
90  * registers used to pass values to function calls are checked against
91  * function argument constraints.
92  *
93  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
94  * It means that the register type passed to this function must be
95  * PTR_TO_STACK and it will be used inside the function as
96  * 'pointer to map element key'
97  *
98  * For example the argument constraints for bpf_map_lookup_elem():
99  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
100  *   .arg1_type = ARG_CONST_MAP_PTR,
101  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
102  *
103  * ret_type says that this function returns 'pointer to map elem value or null'
104  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
105  * 2nd argument should be a pointer to stack, which will be used inside
106  * the helper function as a pointer to map element key.
107  *
108  * On the kernel side the helper function looks like:
109  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
110  * {
111  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
112  *    void *key = (void *) (unsigned long) r2;
113  *    void *value;
114  *
115  *    here kernel can access 'key' and 'map' pointers safely, knowing that
116  *    [key, key + map->key_size) bytes are valid and were initialized on
117  *    the stack of eBPF program.
118  * }
119  *
120  * Corresponding eBPF program may look like:
121  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
122  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
123  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
124  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
125  * here verifier looks at prototype of map_lookup_elem() and sees:
126  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
127  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
128  *
129  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
130  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
131  * and were initialized prior to this call.
132  * If it's ok, then verifier allows this BPF_CALL insn and looks at
133  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
134  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
135  * returns ether pointer to map value or NULL.
136  *
137  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
138  * insn, the register holding that pointer in the true branch changes state to
139  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
140  * branch. See check_cond_jmp_op().
141  *
142  * After the call R0 is set to return type of the function and registers R1-R5
143  * are set to NOT_INIT to indicate that they are no longer readable.
144  *
145  * The following reference types represent a potential reference to a kernel
146  * resource which, after first being allocated, must be checked and freed by
147  * the BPF program:
148  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
149  *
150  * When the verifier sees a helper call return a reference type, it allocates a
151  * pointer id for the reference and stores it in the current function state.
152  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
153  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
154  * passes through a NULL-check conditional. For the branch wherein the state is
155  * changed to CONST_IMM, the verifier releases the reference.
156  *
157  * For each helper function that allocates a reference, such as
158  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
159  * bpf_sk_release(). When a reference type passes into the release function,
160  * the verifier also releases the reference. If any unchecked or unreleased
161  * reference remains at the end of the program, the verifier rejects it.
162  */
163 
164 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
165 struct bpf_verifier_stack_elem {
166 	/* verifer state is 'st'
167 	 * before processing instruction 'insn_idx'
168 	 * and after processing instruction 'prev_insn_idx'
169 	 */
170 	struct bpf_verifier_state st;
171 	int insn_idx;
172 	int prev_insn_idx;
173 	struct bpf_verifier_stack_elem *next;
174 	/* length of verifier log at the time this state was pushed on stack */
175 	u32 log_pos;
176 };
177 
178 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
179 #define BPF_COMPLEXITY_LIMIT_STATES	64
180 
181 #define BPF_MAP_KEY_POISON	(1ULL << 63)
182 #define BPF_MAP_KEY_SEEN	(1ULL << 62)
183 
184 #define BPF_MAP_PTR_UNPRIV	1UL
185 #define BPF_MAP_PTR_POISON	((void *)((0xeB9FUL << 1) +	\
186 					  POISON_POINTER_DELTA))
187 #define BPF_MAP_PTR(X)		((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
188 
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data * aux)189 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
190 {
191 	return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
192 }
193 
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data * aux)194 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
195 {
196 	return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
197 }
198 
bpf_map_ptr_store(struct bpf_insn_aux_data * aux,const struct bpf_map * map,bool unpriv)199 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux,
200 			      const struct bpf_map *map, bool unpriv)
201 {
202 	BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
203 	unpriv |= bpf_map_ptr_unpriv(aux);
204 	aux->map_ptr_state = (unsigned long)map |
205 			     (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
206 }
207 
bpf_map_key_poisoned(const struct bpf_insn_aux_data * aux)208 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
209 {
210 	return aux->map_key_state & BPF_MAP_KEY_POISON;
211 }
212 
bpf_map_key_unseen(const struct bpf_insn_aux_data * aux)213 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
214 {
215 	return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
216 }
217 
bpf_map_key_immediate(const struct bpf_insn_aux_data * aux)218 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
219 {
220 	return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
221 }
222 
bpf_map_key_store(struct bpf_insn_aux_data * aux,u64 state)223 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
224 {
225 	bool poisoned = bpf_map_key_poisoned(aux);
226 
227 	aux->map_key_state = state | BPF_MAP_KEY_SEEN |
228 			     (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
229 }
230 
231 struct bpf_call_arg_meta {
232 	struct bpf_map *map_ptr;
233 	bool raw_mode;
234 	bool pkt_access;
235 	int regno;
236 	int access_size;
237 	int mem_size;
238 	u64 msize_max_value;
239 	int ref_obj_id;
240 	int func_id;
241 	u32 btf_id;
242 	u32 ret_btf_id;
243 };
244 
245 struct btf *btf_vmlinux;
246 
247 static DEFINE_MUTEX(bpf_verifier_lock);
248 
249 static const struct bpf_line_info *
find_linfo(const struct bpf_verifier_env * env,u32 insn_off)250 find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
251 {
252 	const struct bpf_line_info *linfo;
253 	const struct bpf_prog *prog;
254 	u32 i, nr_linfo;
255 
256 	prog = env->prog;
257 	nr_linfo = prog->aux->nr_linfo;
258 
259 	if (!nr_linfo || insn_off >= prog->len)
260 		return NULL;
261 
262 	linfo = prog->aux->linfo;
263 	for (i = 1; i < nr_linfo; i++)
264 		if (insn_off < linfo[i].insn_off)
265 			break;
266 
267 	return &linfo[i - 1];
268 }
269 
bpf_verifier_vlog(struct bpf_verifier_log * log,const char * fmt,va_list args)270 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
271 		       va_list args)
272 {
273 	unsigned int n;
274 
275 	n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
276 
277 	WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1,
278 		  "verifier log line truncated - local buffer too short\n");
279 
280 	n = min(log->len_total - log->len_used - 1, n);
281 	log->kbuf[n] = '\0';
282 
283 	if (log->level == BPF_LOG_KERNEL) {
284 		pr_err("BPF:%s\n", log->kbuf);
285 		return;
286 	}
287 	if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1))
288 		log->len_used += n;
289 	else
290 		log->ubuf = NULL;
291 }
292 
bpf_vlog_reset(struct bpf_verifier_log * log,u32 new_pos)293 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
294 {
295 	char zero = 0;
296 
297 	if (!bpf_verifier_log_needed(log))
298 		return;
299 
300 	log->len_used = new_pos;
301 	if (put_user(zero, log->ubuf + new_pos))
302 		log->ubuf = NULL;
303 }
304 
305 /* log_level controls verbosity level of eBPF verifier.
306  * bpf_verifier_log_write() is used to dump the verification trace to the log,
307  * so the user can figure out what's wrong with the program
308  */
bpf_verifier_log_write(struct bpf_verifier_env * env,const char * fmt,...)309 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
310 					   const char *fmt, ...)
311 {
312 	va_list args;
313 
314 	if (!bpf_verifier_log_needed(&env->log))
315 		return;
316 
317 	va_start(args, fmt);
318 	bpf_verifier_vlog(&env->log, fmt, args);
319 	va_end(args);
320 }
321 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
322 
verbose(void * private_data,const char * fmt,...)323 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
324 {
325 	struct bpf_verifier_env *env = private_data;
326 	va_list args;
327 
328 	if (!bpf_verifier_log_needed(&env->log))
329 		return;
330 
331 	va_start(args, fmt);
332 	bpf_verifier_vlog(&env->log, fmt, args);
333 	va_end(args);
334 }
335 
bpf_log(struct bpf_verifier_log * log,const char * fmt,...)336 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log,
337 			    const char *fmt, ...)
338 {
339 	va_list args;
340 
341 	if (!bpf_verifier_log_needed(log))
342 		return;
343 
344 	va_start(args, fmt);
345 	bpf_verifier_vlog(log, fmt, args);
346 	va_end(args);
347 }
348 
ltrim(const char * s)349 static const char *ltrim(const char *s)
350 {
351 	while (isspace(*s))
352 		s++;
353 
354 	return s;
355 }
356 
verbose_linfo(struct bpf_verifier_env * env,u32 insn_off,const char * prefix_fmt,...)357 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env,
358 					 u32 insn_off,
359 					 const char *prefix_fmt, ...)
360 {
361 	const struct bpf_line_info *linfo;
362 
363 	if (!bpf_verifier_log_needed(&env->log))
364 		return;
365 
366 	linfo = find_linfo(env, insn_off);
367 	if (!linfo || linfo == env->prev_linfo)
368 		return;
369 
370 	if (prefix_fmt) {
371 		va_list args;
372 
373 		va_start(args, prefix_fmt);
374 		bpf_verifier_vlog(&env->log, prefix_fmt, args);
375 		va_end(args);
376 	}
377 
378 	verbose(env, "%s\n",
379 		ltrim(btf_name_by_offset(env->prog->aux->btf,
380 					 linfo->line_off)));
381 
382 	env->prev_linfo = linfo;
383 }
384 
type_is_pkt_pointer(enum bpf_reg_type type)385 static bool type_is_pkt_pointer(enum bpf_reg_type type)
386 {
387 	return type == PTR_TO_PACKET ||
388 	       type == PTR_TO_PACKET_META;
389 }
390 
type_is_sk_pointer(enum bpf_reg_type type)391 static bool type_is_sk_pointer(enum bpf_reg_type type)
392 {
393 	return type == PTR_TO_SOCKET ||
394 		type == PTR_TO_SOCK_COMMON ||
395 		type == PTR_TO_TCP_SOCK ||
396 		type == PTR_TO_XDP_SOCK;
397 }
398 
reg_type_not_null(enum bpf_reg_type type)399 static bool reg_type_not_null(enum bpf_reg_type type)
400 {
401 	return type == PTR_TO_SOCKET ||
402 		type == PTR_TO_TCP_SOCK ||
403 		type == PTR_TO_MAP_VALUE ||
404 		type == PTR_TO_SOCK_COMMON;
405 }
406 
reg_type_may_be_null(enum bpf_reg_type type)407 static bool reg_type_may_be_null(enum bpf_reg_type type)
408 {
409 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
410 	       type == PTR_TO_SOCKET_OR_NULL ||
411 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
412 	       type == PTR_TO_TCP_SOCK_OR_NULL ||
413 	       type == PTR_TO_BTF_ID_OR_NULL ||
414 	       type == PTR_TO_MEM_OR_NULL ||
415 	       type == PTR_TO_RDONLY_BUF_OR_NULL ||
416 	       type == PTR_TO_RDWR_BUF_OR_NULL;
417 }
418 
reg_may_point_to_spin_lock(const struct bpf_reg_state * reg)419 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
420 {
421 	return reg->type == PTR_TO_MAP_VALUE &&
422 		map_value_has_spin_lock(reg->map_ptr);
423 }
424 
reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)425 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
426 {
427 	return type == PTR_TO_SOCKET ||
428 		type == PTR_TO_SOCKET_OR_NULL ||
429 		type == PTR_TO_TCP_SOCK ||
430 		type == PTR_TO_TCP_SOCK_OR_NULL ||
431 		type == PTR_TO_MEM ||
432 		type == PTR_TO_MEM_OR_NULL;
433 }
434 
arg_type_may_be_refcounted(enum bpf_arg_type type)435 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
436 {
437 	return type == ARG_PTR_TO_SOCK_COMMON;
438 }
439 
arg_type_may_be_null(enum bpf_arg_type type)440 static bool arg_type_may_be_null(enum bpf_arg_type type)
441 {
442 	return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
443 	       type == ARG_PTR_TO_MEM_OR_NULL ||
444 	       type == ARG_PTR_TO_CTX_OR_NULL ||
445 	       type == ARG_PTR_TO_SOCKET_OR_NULL ||
446 	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
447 }
448 
449 /* Determine whether the function releases some resources allocated by another
450  * function call. The first reference type argument will be assumed to be
451  * released by release_reference().
452  */
is_release_function(enum bpf_func_id func_id)453 static bool is_release_function(enum bpf_func_id func_id)
454 {
455 	return func_id == BPF_FUNC_sk_release ||
456 	       func_id == BPF_FUNC_ringbuf_submit ||
457 	       func_id == BPF_FUNC_ringbuf_discard;
458 }
459 
may_be_acquire_function(enum bpf_func_id func_id)460 static bool may_be_acquire_function(enum bpf_func_id func_id)
461 {
462 	return func_id == BPF_FUNC_sk_lookup_tcp ||
463 		func_id == BPF_FUNC_sk_lookup_udp ||
464 		func_id == BPF_FUNC_skc_lookup_tcp ||
465 		func_id == BPF_FUNC_map_lookup_elem ||
466 	        func_id == BPF_FUNC_ringbuf_reserve;
467 }
468 
is_acquire_function(enum bpf_func_id func_id,const struct bpf_map * map)469 static bool is_acquire_function(enum bpf_func_id func_id,
470 				const struct bpf_map *map)
471 {
472 	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
473 
474 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
475 	    func_id == BPF_FUNC_sk_lookup_udp ||
476 	    func_id == BPF_FUNC_skc_lookup_tcp ||
477 	    func_id == BPF_FUNC_ringbuf_reserve)
478 		return true;
479 
480 	if (func_id == BPF_FUNC_map_lookup_elem &&
481 	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
482 	     map_type == BPF_MAP_TYPE_SOCKHASH))
483 		return true;
484 
485 	return false;
486 }
487 
is_ptr_cast_function(enum bpf_func_id func_id)488 static bool is_ptr_cast_function(enum bpf_func_id func_id)
489 {
490 	return func_id == BPF_FUNC_tcp_sock ||
491 		func_id == BPF_FUNC_sk_fullsock ||
492 		func_id == BPF_FUNC_skc_to_tcp_sock ||
493 		func_id == BPF_FUNC_skc_to_tcp6_sock ||
494 		func_id == BPF_FUNC_skc_to_udp6_sock ||
495 		func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
496 		func_id == BPF_FUNC_skc_to_tcp_request_sock;
497 }
498 
499 /* string representation of 'enum bpf_reg_type' */
500 static const char * const reg_type_str[] = {
501 	[NOT_INIT]		= "?",
502 	[SCALAR_VALUE]		= "inv",
503 	[PTR_TO_CTX]		= "ctx",
504 	[CONST_PTR_TO_MAP]	= "map_ptr",
505 	[PTR_TO_MAP_VALUE]	= "map_value",
506 	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
507 	[PTR_TO_STACK]		= "fp",
508 	[PTR_TO_PACKET]		= "pkt",
509 	[PTR_TO_PACKET_META]	= "pkt_meta",
510 	[PTR_TO_PACKET_END]	= "pkt_end",
511 	[PTR_TO_FLOW_KEYS]	= "flow_keys",
512 	[PTR_TO_SOCKET]		= "sock",
513 	[PTR_TO_SOCKET_OR_NULL] = "sock_or_null",
514 	[PTR_TO_SOCK_COMMON]	= "sock_common",
515 	[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
516 	[PTR_TO_TCP_SOCK]	= "tcp_sock",
517 	[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
518 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
519 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
520 	[PTR_TO_BTF_ID]		= "ptr_",
521 	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
522 	[PTR_TO_PERCPU_BTF_ID]	= "percpu_ptr_",
523 	[PTR_TO_MEM]		= "mem",
524 	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
525 	[PTR_TO_RDONLY_BUF]	= "rdonly_buf",
526 	[PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null",
527 	[PTR_TO_RDWR_BUF]	= "rdwr_buf",
528 	[PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null",
529 };
530 
531 static char slot_type_char[] = {
532 	[STACK_INVALID]	= '?',
533 	[STACK_SPILL]	= 'r',
534 	[STACK_MISC]	= 'm',
535 	[STACK_ZERO]	= '0',
536 };
537 
print_liveness(struct bpf_verifier_env * env,enum bpf_reg_liveness live)538 static void print_liveness(struct bpf_verifier_env *env,
539 			   enum bpf_reg_liveness live)
540 {
541 	if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE))
542 	    verbose(env, "_");
543 	if (live & REG_LIVE_READ)
544 		verbose(env, "r");
545 	if (live & REG_LIVE_WRITTEN)
546 		verbose(env, "w");
547 	if (live & REG_LIVE_DONE)
548 		verbose(env, "D");
549 }
550 
func(struct bpf_verifier_env * env,const struct bpf_reg_state * reg)551 static struct bpf_func_state *func(struct bpf_verifier_env *env,
552 				   const struct bpf_reg_state *reg)
553 {
554 	struct bpf_verifier_state *cur = env->cur_state;
555 
556 	return cur->frame[reg->frameno];
557 }
558 
kernel_type_name(u32 id)559 const char *kernel_type_name(u32 id)
560 {
561 	return btf_name_by_offset(btf_vmlinux,
562 				  btf_type_by_id(btf_vmlinux, id)->name_off);
563 }
564 
print_verifier_state(struct bpf_verifier_env * env,const struct bpf_func_state * state)565 static void print_verifier_state(struct bpf_verifier_env *env,
566 				 const struct bpf_func_state *state)
567 {
568 	const struct bpf_reg_state *reg;
569 	enum bpf_reg_type t;
570 	int i;
571 
572 	if (state->frameno)
573 		verbose(env, " frame%d:", state->frameno);
574 	for (i = 0; i < MAX_BPF_REG; i++) {
575 		reg = &state->regs[i];
576 		t = reg->type;
577 		if (t == NOT_INIT)
578 			continue;
579 		verbose(env, " R%d", i);
580 		print_liveness(env, reg->live);
581 		verbose(env, "=%s", reg_type_str[t]);
582 		if (t == SCALAR_VALUE && reg->precise)
583 			verbose(env, "P");
584 		if ((t == SCALAR_VALUE || t == PTR_TO_STACK) &&
585 		    tnum_is_const(reg->var_off)) {
586 			/* reg->off should be 0 for SCALAR_VALUE */
587 			verbose(env, "%lld", reg->var_off.value + reg->off);
588 		} else {
589 			if (t == PTR_TO_BTF_ID ||
590 			    t == PTR_TO_BTF_ID_OR_NULL ||
591 			    t == PTR_TO_PERCPU_BTF_ID)
592 				verbose(env, "%s", kernel_type_name(reg->btf_id));
593 			verbose(env, "(id=%d", reg->id);
594 			if (reg_type_may_be_refcounted_or_null(t))
595 				verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
596 			if (t != SCALAR_VALUE)
597 				verbose(env, ",off=%d", reg->off);
598 			if (type_is_pkt_pointer(t))
599 				verbose(env, ",r=%d", reg->range);
600 			else if (t == CONST_PTR_TO_MAP ||
601 				 t == PTR_TO_MAP_VALUE ||
602 				 t == PTR_TO_MAP_VALUE_OR_NULL)
603 				verbose(env, ",ks=%d,vs=%d",
604 					reg->map_ptr->key_size,
605 					reg->map_ptr->value_size);
606 			if (tnum_is_const(reg->var_off)) {
607 				/* Typically an immediate SCALAR_VALUE, but
608 				 * could be a pointer whose offset is too big
609 				 * for reg->off
610 				 */
611 				verbose(env, ",imm=%llx", reg->var_off.value);
612 			} else {
613 				if (reg->smin_value != reg->umin_value &&
614 				    reg->smin_value != S64_MIN)
615 					verbose(env, ",smin_value=%lld",
616 						(long long)reg->smin_value);
617 				if (reg->smax_value != reg->umax_value &&
618 				    reg->smax_value != S64_MAX)
619 					verbose(env, ",smax_value=%lld",
620 						(long long)reg->smax_value);
621 				if (reg->umin_value != 0)
622 					verbose(env, ",umin_value=%llu",
623 						(unsigned long long)reg->umin_value);
624 				if (reg->umax_value != U64_MAX)
625 					verbose(env, ",umax_value=%llu",
626 						(unsigned long long)reg->umax_value);
627 				if (!tnum_is_unknown(reg->var_off)) {
628 					char tn_buf[48];
629 
630 					tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
631 					verbose(env, ",var_off=%s", tn_buf);
632 				}
633 				if (reg->s32_min_value != reg->smin_value &&
634 				    reg->s32_min_value != S32_MIN)
635 					verbose(env, ",s32_min_value=%d",
636 						(int)(reg->s32_min_value));
637 				if (reg->s32_max_value != reg->smax_value &&
638 				    reg->s32_max_value != S32_MAX)
639 					verbose(env, ",s32_max_value=%d",
640 						(int)(reg->s32_max_value));
641 				if (reg->u32_min_value != reg->umin_value &&
642 				    reg->u32_min_value != U32_MIN)
643 					verbose(env, ",u32_min_value=%d",
644 						(int)(reg->u32_min_value));
645 				if (reg->u32_max_value != reg->umax_value &&
646 				    reg->u32_max_value != U32_MAX)
647 					verbose(env, ",u32_max_value=%d",
648 						(int)(reg->u32_max_value));
649 			}
650 			verbose(env, ")");
651 		}
652 	}
653 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
654 		char types_buf[BPF_REG_SIZE + 1];
655 		bool valid = false;
656 		int j;
657 
658 		for (j = 0; j < BPF_REG_SIZE; j++) {
659 			if (state->stack[i].slot_type[j] != STACK_INVALID)
660 				valid = true;
661 			types_buf[j] = slot_type_char[
662 					state->stack[i].slot_type[j]];
663 		}
664 		types_buf[BPF_REG_SIZE] = 0;
665 		if (!valid)
666 			continue;
667 		verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
668 		print_liveness(env, state->stack[i].spilled_ptr.live);
669 		if (state->stack[i].slot_type[0] == STACK_SPILL) {
670 			reg = &state->stack[i].spilled_ptr;
671 			t = reg->type;
672 			verbose(env, "=%s", reg_type_str[t]);
673 			if (t == SCALAR_VALUE && reg->precise)
674 				verbose(env, "P");
675 			if (t == SCALAR_VALUE && tnum_is_const(reg->var_off))
676 				verbose(env, "%lld", reg->var_off.value + reg->off);
677 		} else {
678 			verbose(env, "=%s", types_buf);
679 		}
680 	}
681 	if (state->acquired_refs && state->refs[0].id) {
682 		verbose(env, " refs=%d", state->refs[0].id);
683 		for (i = 1; i < state->acquired_refs; i++)
684 			if (state->refs[i].id)
685 				verbose(env, ",%d", state->refs[i].id);
686 	}
687 	verbose(env, "\n");
688 }
689 
690 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)				\
691 static int copy_##NAME##_state(struct bpf_func_state *dst,		\
692 			       const struct bpf_func_state *src)	\
693 {									\
694 	if (!src->FIELD)						\
695 		return 0;						\
696 	if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {			\
697 		/* internal bug, make state invalid to reject the program */ \
698 		memset(dst, 0, sizeof(*dst));				\
699 		return -EFAULT;						\
700 	}								\
701 	memcpy(dst->FIELD, src->FIELD,					\
702 	       sizeof(*src->FIELD) * (src->COUNT / SIZE));		\
703 	return 0;							\
704 }
705 /* copy_reference_state() */
706 COPY_STATE_FN(reference, acquired_refs, refs, 1)
707 /* copy_stack_state() */
COPY_STATE_FN(stack,allocated_stack,stack,BPF_REG_SIZE)708 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
709 #undef COPY_STATE_FN
710 
711 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)			\
712 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, \
713 				  bool copy_old)			\
714 {									\
715 	u32 old_size = state->COUNT;					\
716 	struct bpf_##NAME##_state *new_##FIELD;				\
717 	int slot = size / SIZE;						\
718 									\
719 	if (size <= old_size || !size) {				\
720 		if (copy_old)						\
721 			return 0;					\
722 		state->COUNT = slot * SIZE;				\
723 		if (!size && old_size) {				\
724 			kfree(state->FIELD);				\
725 			state->FIELD = NULL;				\
726 		}							\
727 		return 0;						\
728 	}								\
729 	new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), \
730 				    GFP_KERNEL);			\
731 	if (!new_##FIELD)						\
732 		return -ENOMEM;						\
733 	if (copy_old) {							\
734 		if (state->FIELD)					\
735 			memcpy(new_##FIELD, state->FIELD,		\
736 			       sizeof(*new_##FIELD) * (old_size / SIZE)); \
737 		memset(new_##FIELD + old_size / SIZE, 0,		\
738 		       sizeof(*new_##FIELD) * (size - old_size) / SIZE); \
739 	}								\
740 	state->COUNT = slot * SIZE;					\
741 	kfree(state->FIELD);						\
742 	state->FIELD = new_##FIELD;					\
743 	return 0;							\
744 }
745 /* realloc_reference_state() */
746 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
747 /* realloc_stack_state() */
748 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
749 #undef REALLOC_STATE_FN
750 
751 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
752  * make it consume minimal amount of memory. check_stack_write() access from
753  * the program calls into realloc_func_state() to grow the stack size.
754  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
755  * which realloc_stack_state() copies over. It points to previous
756  * bpf_verifier_state which is never reallocated.
757  */
758 static int realloc_func_state(struct bpf_func_state *state, int stack_size,
759 			      int refs_size, bool copy_old)
760 {
761 	int err = realloc_reference_state(state, refs_size, copy_old);
762 	if (err)
763 		return err;
764 	return realloc_stack_state(state, stack_size, copy_old);
765 }
766 
767 /* Acquire a pointer id from the env and update the state->refs to include
768  * this new pointer reference.
769  * On success, returns a valid pointer id to associate with the register
770  * On failure, returns a negative errno.
771  */
acquire_reference_state(struct bpf_verifier_env * env,int insn_idx)772 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
773 {
774 	struct bpf_func_state *state = cur_func(env);
775 	int new_ofs = state->acquired_refs;
776 	int id, err;
777 
778 	err = realloc_reference_state(state, state->acquired_refs + 1, true);
779 	if (err)
780 		return err;
781 	id = ++env->id_gen;
782 	state->refs[new_ofs].id = id;
783 	state->refs[new_ofs].insn_idx = insn_idx;
784 
785 	return id;
786 }
787 
788 /* release function corresponding to acquire_reference_state(). Idempotent. */
release_reference_state(struct bpf_func_state * state,int ptr_id)789 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
790 {
791 	int i, last_idx;
792 
793 	last_idx = state->acquired_refs - 1;
794 	for (i = 0; i < state->acquired_refs; i++) {
795 		if (state->refs[i].id == ptr_id) {
796 			if (last_idx && i != last_idx)
797 				memcpy(&state->refs[i], &state->refs[last_idx],
798 				       sizeof(*state->refs));
799 			memset(&state->refs[last_idx], 0, sizeof(*state->refs));
800 			state->acquired_refs--;
801 			return 0;
802 		}
803 	}
804 	return -EINVAL;
805 }
806 
transfer_reference_state(struct bpf_func_state * dst,struct bpf_func_state * src)807 static int transfer_reference_state(struct bpf_func_state *dst,
808 				    struct bpf_func_state *src)
809 {
810 	int err = realloc_reference_state(dst, src->acquired_refs, false);
811 	if (err)
812 		return err;
813 	err = copy_reference_state(dst, src);
814 	if (err)
815 		return err;
816 	return 0;
817 }
818 
free_func_state(struct bpf_func_state * state)819 static void free_func_state(struct bpf_func_state *state)
820 {
821 	if (!state)
822 		return;
823 	kfree(state->refs);
824 	kfree(state->stack);
825 	kfree(state);
826 }
827 
clear_jmp_history(struct bpf_verifier_state * state)828 static void clear_jmp_history(struct bpf_verifier_state *state)
829 {
830 	kfree(state->jmp_history);
831 	state->jmp_history = NULL;
832 	state->jmp_history_cnt = 0;
833 }
834 
free_verifier_state(struct bpf_verifier_state * state,bool free_self)835 static void free_verifier_state(struct bpf_verifier_state *state,
836 				bool free_self)
837 {
838 	int i;
839 
840 	for (i = 0; i <= state->curframe; i++) {
841 		free_func_state(state->frame[i]);
842 		state->frame[i] = NULL;
843 	}
844 	clear_jmp_history(state);
845 	if (free_self)
846 		kfree(state);
847 }
848 
849 /* copy verifier state from src to dst growing dst stack space
850  * when necessary to accommodate larger src stack
851  */
copy_func_state(struct bpf_func_state * dst,const struct bpf_func_state * src)852 static int copy_func_state(struct bpf_func_state *dst,
853 			   const struct bpf_func_state *src)
854 {
855 	int err;
856 
857 	err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs,
858 				 false);
859 	if (err)
860 		return err;
861 	memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
862 	err = copy_reference_state(dst, src);
863 	if (err)
864 		return err;
865 	return copy_stack_state(dst, src);
866 }
867 
copy_verifier_state(struct bpf_verifier_state * dst_state,const struct bpf_verifier_state * src)868 static int copy_verifier_state(struct bpf_verifier_state *dst_state,
869 			       const struct bpf_verifier_state *src)
870 {
871 	struct bpf_func_state *dst;
872 	u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
873 	int i, err;
874 
875 	if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
876 		kfree(dst_state->jmp_history);
877 		dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
878 		if (!dst_state->jmp_history)
879 			return -ENOMEM;
880 	}
881 	memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
882 	dst_state->jmp_history_cnt = src->jmp_history_cnt;
883 
884 	/* if dst has more stack frames then src frame, free them */
885 	for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
886 		free_func_state(dst_state->frame[i]);
887 		dst_state->frame[i] = NULL;
888 	}
889 	dst_state->speculative = src->speculative;
890 	dst_state->curframe = src->curframe;
891 	dst_state->active_spin_lock = src->active_spin_lock;
892 	dst_state->branches = src->branches;
893 	dst_state->parent = src->parent;
894 	dst_state->first_insn_idx = src->first_insn_idx;
895 	dst_state->last_insn_idx = src->last_insn_idx;
896 	for (i = 0; i <= src->curframe; i++) {
897 		dst = dst_state->frame[i];
898 		if (!dst) {
899 			dst = kzalloc(sizeof(*dst), GFP_KERNEL);
900 			if (!dst)
901 				return -ENOMEM;
902 			dst_state->frame[i] = dst;
903 		}
904 		err = copy_func_state(dst, src->frame[i]);
905 		if (err)
906 			return err;
907 	}
908 	return 0;
909 }
910 
update_branch_counts(struct bpf_verifier_env * env,struct bpf_verifier_state * st)911 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
912 {
913 	while (st) {
914 		u32 br = --st->branches;
915 
916 		/* WARN_ON(br > 1) technically makes sense here,
917 		 * but see comment in push_stack(), hence:
918 		 */
919 		WARN_ONCE((int)br < 0,
920 			  "BUG update_branch_counts:branches_to_explore=%d\n",
921 			  br);
922 		if (br)
923 			break;
924 		st = st->parent;
925 	}
926 }
927 
pop_stack(struct bpf_verifier_env * env,int * prev_insn_idx,int * insn_idx,bool pop_log)928 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
929 		     int *insn_idx, bool pop_log)
930 {
931 	struct bpf_verifier_state *cur = env->cur_state;
932 	struct bpf_verifier_stack_elem *elem, *head = env->head;
933 	int err;
934 
935 	if (env->head == NULL)
936 		return -ENOENT;
937 
938 	if (cur) {
939 		err = copy_verifier_state(cur, &head->st);
940 		if (err)
941 			return err;
942 	}
943 	if (pop_log)
944 		bpf_vlog_reset(&env->log, head->log_pos);
945 	if (insn_idx)
946 		*insn_idx = head->insn_idx;
947 	if (prev_insn_idx)
948 		*prev_insn_idx = head->prev_insn_idx;
949 	elem = head->next;
950 	free_verifier_state(&head->st, false);
951 	kfree(head);
952 	env->head = elem;
953 	env->stack_size--;
954 	return 0;
955 }
956 
push_stack(struct bpf_verifier_env * env,int insn_idx,int prev_insn_idx,bool speculative)957 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
958 					     int insn_idx, int prev_insn_idx,
959 					     bool speculative)
960 {
961 	struct bpf_verifier_state *cur = env->cur_state;
962 	struct bpf_verifier_stack_elem *elem;
963 	int err;
964 
965 	elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
966 	if (!elem)
967 		goto err;
968 
969 	elem->insn_idx = insn_idx;
970 	elem->prev_insn_idx = prev_insn_idx;
971 	elem->next = env->head;
972 	elem->log_pos = env->log.len_used;
973 	env->head = elem;
974 	env->stack_size++;
975 	err = copy_verifier_state(&elem->st, cur);
976 	if (err)
977 		goto err;
978 	elem->st.speculative |= speculative;
979 	if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
980 		verbose(env, "The sequence of %d jumps is too complex.\n",
981 			env->stack_size);
982 		goto err;
983 	}
984 	if (elem->st.parent) {
985 		++elem->st.parent->branches;
986 		/* WARN_ON(branches > 2) technically makes sense here,
987 		 * but
988 		 * 1. speculative states will bump 'branches' for non-branch
989 		 * instructions
990 		 * 2. is_state_visited() heuristics may decide not to create
991 		 * a new state for a sequence of branches and all such current
992 		 * and cloned states will be pointing to a single parent state
993 		 * which might have large 'branches' count.
994 		 */
995 	}
996 	return &elem->st;
997 err:
998 	free_verifier_state(env->cur_state, true);
999 	env->cur_state = NULL;
1000 	/* pop all elements and return */
1001 	while (!pop_stack(env, NULL, NULL, false));
1002 	return NULL;
1003 }
1004 
1005 #define CALLER_SAVED_REGS 6
1006 static const int caller_saved[CALLER_SAVED_REGS] = {
1007 	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
1008 };
1009 
1010 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1011 				struct bpf_reg_state *reg);
1012 
1013 /* This helper doesn't clear reg->id */
___mark_reg_known(struct bpf_reg_state * reg,u64 imm)1014 static void ___mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1015 {
1016 	reg->var_off = tnum_const(imm);
1017 	reg->smin_value = (s64)imm;
1018 	reg->smax_value = (s64)imm;
1019 	reg->umin_value = imm;
1020 	reg->umax_value = imm;
1021 
1022 	reg->s32_min_value = (s32)imm;
1023 	reg->s32_max_value = (s32)imm;
1024 	reg->u32_min_value = (u32)imm;
1025 	reg->u32_max_value = (u32)imm;
1026 }
1027 
1028 /* Mark the unknown part of a register (variable offset or scalar value) as
1029  * known to have the value @imm.
1030  */
__mark_reg_known(struct bpf_reg_state * reg,u64 imm)1031 static void __mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1032 {
1033 	/* Clear id, off, and union(map_ptr, range) */
1034 	memset(((u8 *)reg) + sizeof(reg->type), 0,
1035 	       offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1036 	___mark_reg_known(reg, imm);
1037 }
1038 
__mark_reg32_known(struct bpf_reg_state * reg,u64 imm)1039 static void __mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1040 {
1041 	reg->var_off = tnum_const_subreg(reg->var_off, imm);
1042 	reg->s32_min_value = (s32)imm;
1043 	reg->s32_max_value = (s32)imm;
1044 	reg->u32_min_value = (u32)imm;
1045 	reg->u32_max_value = (u32)imm;
1046 }
1047 
1048 /* Mark the 'variable offset' part of a register as zero.  This should be
1049  * used only on registers holding a pointer type.
1050  */
__mark_reg_known_zero(struct bpf_reg_state * reg)1051 static void __mark_reg_known_zero(struct bpf_reg_state *reg)
1052 {
1053 	__mark_reg_known(reg, 0);
1054 }
1055 
__mark_reg_const_zero(struct bpf_reg_state * reg)1056 static void __mark_reg_const_zero(struct bpf_reg_state *reg)
1057 {
1058 	__mark_reg_known(reg, 0);
1059 	reg->type = SCALAR_VALUE;
1060 }
1061 
mark_reg_known_zero(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)1062 static void mark_reg_known_zero(struct bpf_verifier_env *env,
1063 				struct bpf_reg_state *regs, u32 regno)
1064 {
1065 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1066 		verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1067 		/* Something bad happened, let's kill all regs */
1068 		for (regno = 0; regno < MAX_BPF_REG; regno++)
1069 			__mark_reg_not_init(env, regs + regno);
1070 		return;
1071 	}
1072 	__mark_reg_known_zero(regs + regno);
1073 }
1074 
reg_is_pkt_pointer(const struct bpf_reg_state * reg)1075 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1076 {
1077 	return type_is_pkt_pointer(reg->type);
1078 }
1079 
reg_is_pkt_pointer_any(const struct bpf_reg_state * reg)1080 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1081 {
1082 	return reg_is_pkt_pointer(reg) ||
1083 	       reg->type == PTR_TO_PACKET_END;
1084 }
1085 
1086 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state * reg,enum bpf_reg_type which)1087 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg,
1088 				    enum bpf_reg_type which)
1089 {
1090 	/* The register can already have a range from prior markings.
1091 	 * This is fine as long as it hasn't been advanced from its
1092 	 * origin.
1093 	 */
1094 	return reg->type == which &&
1095 	       reg->id == 0 &&
1096 	       reg->off == 0 &&
1097 	       tnum_equals_const(reg->var_off, 0);
1098 }
1099 
1100 /* Reset the min/max bounds of a register */
__mark_reg_unbounded(struct bpf_reg_state * reg)1101 static void __mark_reg_unbounded(struct bpf_reg_state *reg)
1102 {
1103 	reg->smin_value = S64_MIN;
1104 	reg->smax_value = S64_MAX;
1105 	reg->umin_value = 0;
1106 	reg->umax_value = U64_MAX;
1107 
1108 	reg->s32_min_value = S32_MIN;
1109 	reg->s32_max_value = S32_MAX;
1110 	reg->u32_min_value = 0;
1111 	reg->u32_max_value = U32_MAX;
1112 }
1113 
__mark_reg64_unbounded(struct bpf_reg_state * reg)1114 static void __mark_reg64_unbounded(struct bpf_reg_state *reg)
1115 {
1116 	reg->smin_value = S64_MIN;
1117 	reg->smax_value = S64_MAX;
1118 	reg->umin_value = 0;
1119 	reg->umax_value = U64_MAX;
1120 }
1121 
__mark_reg32_unbounded(struct bpf_reg_state * reg)1122 static void __mark_reg32_unbounded(struct bpf_reg_state *reg)
1123 {
1124 	reg->s32_min_value = S32_MIN;
1125 	reg->s32_max_value = S32_MAX;
1126 	reg->u32_min_value = 0;
1127 	reg->u32_max_value = U32_MAX;
1128 }
1129 
__update_reg32_bounds(struct bpf_reg_state * reg)1130 static void __update_reg32_bounds(struct bpf_reg_state *reg)
1131 {
1132 	struct tnum var32_off = tnum_subreg(reg->var_off);
1133 
1134 	/* min signed is max(sign bit) | min(other bits) */
1135 	reg->s32_min_value = max_t(s32, reg->s32_min_value,
1136 			var32_off.value | (var32_off.mask & S32_MIN));
1137 	/* max signed is min(sign bit) | max(other bits) */
1138 	reg->s32_max_value = min_t(s32, reg->s32_max_value,
1139 			var32_off.value | (var32_off.mask & S32_MAX));
1140 	reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1141 	reg->u32_max_value = min(reg->u32_max_value,
1142 				 (u32)(var32_off.value | var32_off.mask));
1143 }
1144 
__update_reg64_bounds(struct bpf_reg_state * reg)1145 static void __update_reg64_bounds(struct bpf_reg_state *reg)
1146 {
1147 	/* min signed is max(sign bit) | min(other bits) */
1148 	reg->smin_value = max_t(s64, reg->smin_value,
1149 				reg->var_off.value | (reg->var_off.mask & S64_MIN));
1150 	/* max signed is min(sign bit) | max(other bits) */
1151 	reg->smax_value = min_t(s64, reg->smax_value,
1152 				reg->var_off.value | (reg->var_off.mask & S64_MAX));
1153 	reg->umin_value = max(reg->umin_value, reg->var_off.value);
1154 	reg->umax_value = min(reg->umax_value,
1155 			      reg->var_off.value | reg->var_off.mask);
1156 }
1157 
__update_reg_bounds(struct bpf_reg_state * reg)1158 static void __update_reg_bounds(struct bpf_reg_state *reg)
1159 {
1160 	__update_reg32_bounds(reg);
1161 	__update_reg64_bounds(reg);
1162 }
1163 
1164 /* Uses signed min/max values to inform unsigned, and vice-versa */
__reg32_deduce_bounds(struct bpf_reg_state * reg)1165 static void __reg32_deduce_bounds(struct bpf_reg_state *reg)
1166 {
1167 	/* Learn sign from signed bounds.
1168 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1169 	 * are the same, so combine.  This works even in the negative case, e.g.
1170 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1171 	 */
1172 	if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1173 		reg->s32_min_value = reg->u32_min_value =
1174 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1175 		reg->s32_max_value = reg->u32_max_value =
1176 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1177 		return;
1178 	}
1179 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1180 	 * boundary, so we must be careful.
1181 	 */
1182 	if ((s32)reg->u32_max_value >= 0) {
1183 		/* Positive.  We can't learn anything from the smin, but smax
1184 		 * is positive, hence safe.
1185 		 */
1186 		reg->s32_min_value = reg->u32_min_value;
1187 		reg->s32_max_value = reg->u32_max_value =
1188 			min_t(u32, reg->s32_max_value, reg->u32_max_value);
1189 	} else if ((s32)reg->u32_min_value < 0) {
1190 		/* Negative.  We can't learn anything from the smax, but smin
1191 		 * is negative, hence safe.
1192 		 */
1193 		reg->s32_min_value = reg->u32_min_value =
1194 			max_t(u32, reg->s32_min_value, reg->u32_min_value);
1195 		reg->s32_max_value = reg->u32_max_value;
1196 	}
1197 }
1198 
__reg64_deduce_bounds(struct bpf_reg_state * reg)1199 static void __reg64_deduce_bounds(struct bpf_reg_state *reg)
1200 {
1201 	/* Learn sign from signed bounds.
1202 	 * If we cannot cross the sign boundary, then signed and unsigned bounds
1203 	 * are the same, so combine.  This works even in the negative case, e.g.
1204 	 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1205 	 */
1206 	if (reg->smin_value >= 0 || reg->smax_value < 0) {
1207 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1208 							  reg->umin_value);
1209 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1210 							  reg->umax_value);
1211 		return;
1212 	}
1213 	/* Learn sign from unsigned bounds.  Signed bounds cross the sign
1214 	 * boundary, so we must be careful.
1215 	 */
1216 	if ((s64)reg->umax_value >= 0) {
1217 		/* Positive.  We can't learn anything from the smin, but smax
1218 		 * is positive, hence safe.
1219 		 */
1220 		reg->smin_value = reg->umin_value;
1221 		reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value,
1222 							  reg->umax_value);
1223 	} else if ((s64)reg->umin_value < 0) {
1224 		/* Negative.  We can't learn anything from the smax, but smin
1225 		 * is negative, hence safe.
1226 		 */
1227 		reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value,
1228 							  reg->umin_value);
1229 		reg->smax_value = reg->umax_value;
1230 	}
1231 }
1232 
__reg_deduce_bounds(struct bpf_reg_state * reg)1233 static void __reg_deduce_bounds(struct bpf_reg_state *reg)
1234 {
1235 	__reg32_deduce_bounds(reg);
1236 	__reg64_deduce_bounds(reg);
1237 }
1238 
1239 /* Attempts to improve var_off based on unsigned min/max information */
__reg_bound_offset(struct bpf_reg_state * reg)1240 static void __reg_bound_offset(struct bpf_reg_state *reg)
1241 {
1242 	struct tnum var64_off = tnum_intersect(reg->var_off,
1243 					       tnum_range(reg->umin_value,
1244 							  reg->umax_value));
1245 	struct tnum var32_off = tnum_intersect(tnum_subreg(reg->var_off),
1246 						tnum_range(reg->u32_min_value,
1247 							   reg->u32_max_value));
1248 
1249 	reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1250 }
1251 
reg_bounds_sync(struct bpf_reg_state * reg)1252 static void reg_bounds_sync(struct bpf_reg_state *reg)
1253 {
1254 	/* We might have learned new bounds from the var_off. */
1255 	__update_reg_bounds(reg);
1256 	/* We might have learned something about the sign bit. */
1257 	__reg_deduce_bounds(reg);
1258 	/* We might have learned some bits from the bounds. */
1259 	__reg_bound_offset(reg);
1260 	/* Intersecting with the old var_off might have improved our bounds
1261 	 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1262 	 * then new var_off is (0; 0x7f...fc) which improves our umax.
1263 	 */
1264 	__update_reg_bounds(reg);
1265 }
1266 
__reg32_bound_s64(s32 a)1267 static bool __reg32_bound_s64(s32 a)
1268 {
1269 	return a >= 0 && a <= S32_MAX;
1270 }
1271 
__reg_assign_32_into_64(struct bpf_reg_state * reg)1272 static void __reg_assign_32_into_64(struct bpf_reg_state *reg)
1273 {
1274 	reg->umin_value = reg->u32_min_value;
1275 	reg->umax_value = reg->u32_max_value;
1276 
1277 	/* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1278 	 * be positive otherwise set to worse case bounds and refine later
1279 	 * from tnum.
1280 	 */
1281 	if (__reg32_bound_s64(reg->s32_min_value) &&
1282 	    __reg32_bound_s64(reg->s32_max_value)) {
1283 		reg->smin_value = reg->s32_min_value;
1284 		reg->smax_value = reg->s32_max_value;
1285 	} else {
1286 		reg->smin_value = 0;
1287 		reg->smax_value = U32_MAX;
1288 	}
1289 }
1290 
__reg_combine_32_into_64(struct bpf_reg_state * reg)1291 static void __reg_combine_32_into_64(struct bpf_reg_state *reg)
1292 {
1293 	/* special case when 64-bit register has upper 32-bit register
1294 	 * zeroed. Typically happens after zext or <<32, >>32 sequence
1295 	 * allowing us to use 32-bit bounds directly,
1296 	 */
1297 	if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1298 		__reg_assign_32_into_64(reg);
1299 	} else {
1300 		/* Otherwise the best we can do is push lower 32bit known and
1301 		 * unknown bits into register (var_off set from jmp logic)
1302 		 * then learn as much as possible from the 64-bit tnum
1303 		 * known and unknown bits. The previous smin/smax bounds are
1304 		 * invalid here because of jmp32 compare so mark them unknown
1305 		 * so they do not impact tnum bounds calculation.
1306 		 */
1307 		__mark_reg64_unbounded(reg);
1308 	}
1309 	reg_bounds_sync(reg);
1310 }
1311 
__reg64_bound_s32(s64 a)1312 static bool __reg64_bound_s32(s64 a)
1313 {
1314 	return a >= S32_MIN && a <= S32_MAX;
1315 }
1316 
__reg64_bound_u32(u64 a)1317 static bool __reg64_bound_u32(u64 a)
1318 {
1319 	return a >= U32_MIN && a <= U32_MAX;
1320 }
1321 
__reg_combine_64_into_32(struct bpf_reg_state * reg)1322 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1323 {
1324 	__mark_reg32_unbounded(reg);
1325 	if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
1326 		reg->s32_min_value = (s32)reg->smin_value;
1327 		reg->s32_max_value = (s32)reg->smax_value;
1328 	}
1329 	if (__reg64_bound_u32(reg->umin_value) && __reg64_bound_u32(reg->umax_value)) {
1330 		reg->u32_min_value = (u32)reg->umin_value;
1331 		reg->u32_max_value = (u32)reg->umax_value;
1332 	}
1333 	reg_bounds_sync(reg);
1334 }
1335 
1336 /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)1337 static void __mark_reg_unknown(const struct bpf_verifier_env *env,
1338 			       struct bpf_reg_state *reg)
1339 {
1340 	/*
1341 	 * Clear type, id, off, and union(map_ptr, range) and
1342 	 * padding between 'type' and union
1343 	 */
1344 	memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1345 	reg->type = SCALAR_VALUE;
1346 	reg->var_off = tnum_unknown;
1347 	reg->frameno = 0;
1348 	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1349 	__mark_reg_unbounded(reg);
1350 }
1351 
mark_reg_unknown(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)1352 static void mark_reg_unknown(struct bpf_verifier_env *env,
1353 			     struct bpf_reg_state *regs, u32 regno)
1354 {
1355 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1356 		verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1357 		/* Something bad happened, let's kill all regs except FP */
1358 		for (regno = 0; regno < BPF_REG_FP; regno++)
1359 			__mark_reg_not_init(env, regs + regno);
1360 		return;
1361 	}
1362 	__mark_reg_unknown(env, regs + regno);
1363 }
1364 
__mark_reg_not_init(const struct bpf_verifier_env * env,struct bpf_reg_state * reg)1365 static void __mark_reg_not_init(const struct bpf_verifier_env *env,
1366 				struct bpf_reg_state *reg)
1367 {
1368 	__mark_reg_unknown(env, reg);
1369 	reg->type = NOT_INIT;
1370 }
1371 
mark_reg_not_init(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno)1372 static void mark_reg_not_init(struct bpf_verifier_env *env,
1373 			      struct bpf_reg_state *regs, u32 regno)
1374 {
1375 	if (WARN_ON(regno >= MAX_BPF_REG)) {
1376 		verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1377 		/* Something bad happened, let's kill all regs except FP */
1378 		for (regno = 0; regno < BPF_REG_FP; regno++)
1379 			__mark_reg_not_init(env, regs + regno);
1380 		return;
1381 	}
1382 	__mark_reg_not_init(env, regs + regno);
1383 }
1384 
mark_btf_ld_reg(struct bpf_verifier_env * env,struct bpf_reg_state * regs,u32 regno,enum bpf_reg_type reg_type,u32 btf_id)1385 static void mark_btf_ld_reg(struct bpf_verifier_env *env,
1386 			    struct bpf_reg_state *regs, u32 regno,
1387 			    enum bpf_reg_type reg_type, u32 btf_id)
1388 {
1389 	if (reg_type == SCALAR_VALUE) {
1390 		mark_reg_unknown(env, regs, regno);
1391 		return;
1392 	}
1393 	mark_reg_known_zero(env, regs, regno);
1394 	regs[regno].type = PTR_TO_BTF_ID;
1395 	regs[regno].btf_id = btf_id;
1396 }
1397 
1398 #define DEF_NOT_SUBREG	(0)
init_reg_state(struct bpf_verifier_env * env,struct bpf_func_state * state)1399 static void init_reg_state(struct bpf_verifier_env *env,
1400 			   struct bpf_func_state *state)
1401 {
1402 	struct bpf_reg_state *regs = state->regs;
1403 	int i;
1404 
1405 	for (i = 0; i < MAX_BPF_REG; i++) {
1406 		mark_reg_not_init(env, regs, i);
1407 		regs[i].live = REG_LIVE_NONE;
1408 		regs[i].parent = NULL;
1409 		regs[i].subreg_def = DEF_NOT_SUBREG;
1410 	}
1411 
1412 	/* frame pointer */
1413 	regs[BPF_REG_FP].type = PTR_TO_STACK;
1414 	mark_reg_known_zero(env, regs, BPF_REG_FP);
1415 	regs[BPF_REG_FP].frameno = state->frameno;
1416 }
1417 
1418 #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env * env,struct bpf_func_state * state,int callsite,int frameno,int subprogno)1419 static void init_func_state(struct bpf_verifier_env *env,
1420 			    struct bpf_func_state *state,
1421 			    int callsite, int frameno, int subprogno)
1422 {
1423 	state->callsite = callsite;
1424 	state->frameno = frameno;
1425 	state->subprogno = subprogno;
1426 	init_reg_state(env, state);
1427 }
1428 
1429 enum reg_arg_type {
1430 	SRC_OP,		/* register is used as source operand */
1431 	DST_OP,		/* register is used as destination operand */
1432 	DST_OP_NO_MARK	/* same as above, check only, don't mark */
1433 };
1434 
cmp_subprogs(const void * a,const void * b)1435 static int cmp_subprogs(const void *a, const void *b)
1436 {
1437 	return ((struct bpf_subprog_info *)a)->start -
1438 	       ((struct bpf_subprog_info *)b)->start;
1439 }
1440 
find_subprog(struct bpf_verifier_env * env,int off)1441 static int find_subprog(struct bpf_verifier_env *env, int off)
1442 {
1443 	struct bpf_subprog_info *p;
1444 
1445 	p = bsearch(&off, env->subprog_info, env->subprog_cnt,
1446 		    sizeof(env->subprog_info[0]), cmp_subprogs);
1447 	if (!p)
1448 		return -ENOENT;
1449 	return p - env->subprog_info;
1450 
1451 }
1452 
add_subprog(struct bpf_verifier_env * env,int off)1453 static int add_subprog(struct bpf_verifier_env *env, int off)
1454 {
1455 	int insn_cnt = env->prog->len;
1456 	int ret;
1457 
1458 	if (off >= insn_cnt || off < 0) {
1459 		verbose(env, "call to invalid destination\n");
1460 		return -EINVAL;
1461 	}
1462 	ret = find_subprog(env, off);
1463 	if (ret >= 0)
1464 		return 0;
1465 	if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1466 		verbose(env, "too many subprograms\n");
1467 		return -E2BIG;
1468 	}
1469 	env->subprog_info[env->subprog_cnt++].start = off;
1470 	sort(env->subprog_info, env->subprog_cnt,
1471 	     sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1472 	return 0;
1473 }
1474 
check_subprogs(struct bpf_verifier_env * env)1475 static int check_subprogs(struct bpf_verifier_env *env)
1476 {
1477 	int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1478 	struct bpf_subprog_info *subprog = env->subprog_info;
1479 	struct bpf_insn *insn = env->prog->insnsi;
1480 	int insn_cnt = env->prog->len;
1481 
1482 	/* Add entry function. */
1483 	ret = add_subprog(env, 0);
1484 	if (ret < 0)
1485 		return ret;
1486 
1487 	/* determine subprog starts. The end is one before the next starts */
1488 	for (i = 0; i < insn_cnt; i++) {
1489 		if (insn[i].code != (BPF_JMP | BPF_CALL))
1490 			continue;
1491 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
1492 			continue;
1493 		if (!env->bpf_capable) {
1494 			verbose(env,
1495 				"function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1496 			return -EPERM;
1497 		}
1498 		ret = add_subprog(env, i + insn[i].imm + 1);
1499 		if (ret < 0)
1500 			return ret;
1501 	}
1502 
1503 	/* Add a fake 'exit' subprog which could simplify subprog iteration
1504 	 * logic. 'subprog_cnt' should not be increased.
1505 	 */
1506 	subprog[env->subprog_cnt].start = insn_cnt;
1507 
1508 	if (env->log.level & BPF_LOG_LEVEL2)
1509 		for (i = 0; i < env->subprog_cnt; i++)
1510 			verbose(env, "func#%d @%d\n", i, subprog[i].start);
1511 
1512 	/* now check that all jumps are within the same subprog */
1513 	subprog_start = subprog[cur_subprog].start;
1514 	subprog_end = subprog[cur_subprog + 1].start;
1515 	for (i = 0; i < insn_cnt; i++) {
1516 		u8 code = insn[i].code;
1517 
1518 		if (code == (BPF_JMP | BPF_CALL) &&
1519 		    insn[i].imm == BPF_FUNC_tail_call &&
1520 		    insn[i].src_reg != BPF_PSEUDO_CALL)
1521 			subprog[cur_subprog].has_tail_call = true;
1522 		if (BPF_CLASS(code) == BPF_LD &&
1523 		    (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
1524 			subprog[cur_subprog].has_ld_abs = true;
1525 		if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
1526 			goto next;
1527 		if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
1528 			goto next;
1529 		off = i + insn[i].off + 1;
1530 		if (off < subprog_start || off >= subprog_end) {
1531 			verbose(env, "jump out of range from insn %d to %d\n", i, off);
1532 			return -EINVAL;
1533 		}
1534 next:
1535 		if (i == subprog_end - 1) {
1536 			/* to avoid fall-through from one subprog into another
1537 			 * the last insn of the subprog should be either exit
1538 			 * or unconditional jump back
1539 			 */
1540 			if (code != (BPF_JMP | BPF_EXIT) &&
1541 			    code != (BPF_JMP | BPF_JA)) {
1542 				verbose(env, "last insn is not an exit or jmp\n");
1543 				return -EINVAL;
1544 			}
1545 			subprog_start = subprog_end;
1546 			cur_subprog++;
1547 			if (cur_subprog < env->subprog_cnt)
1548 				subprog_end = subprog[cur_subprog + 1].start;
1549 		}
1550 	}
1551 	return 0;
1552 }
1553 
1554 /* Parentage chain of this register (or stack slot) should take care of all
1555  * issues like callee-saved registers, stack slot allocation time, etc.
1556  */
mark_reg_read(struct bpf_verifier_env * env,const struct bpf_reg_state * state,struct bpf_reg_state * parent,u8 flag)1557 static int mark_reg_read(struct bpf_verifier_env *env,
1558 			 const struct bpf_reg_state *state,
1559 			 struct bpf_reg_state *parent, u8 flag)
1560 {
1561 	bool writes = parent == state->parent; /* Observe write marks */
1562 	int cnt = 0;
1563 
1564 	while (parent) {
1565 		/* if read wasn't screened by an earlier write ... */
1566 		if (writes && state->live & REG_LIVE_WRITTEN)
1567 			break;
1568 		if (parent->live & REG_LIVE_DONE) {
1569 			verbose(env, "verifier BUG type %s var_off %lld off %d\n",
1570 				reg_type_str[parent->type],
1571 				parent->var_off.value, parent->off);
1572 			return -EFAULT;
1573 		}
1574 		/* The first condition is more likely to be true than the
1575 		 * second, checked it first.
1576 		 */
1577 		if ((parent->live & REG_LIVE_READ) == flag ||
1578 		    parent->live & REG_LIVE_READ64)
1579 			/* The parentage chain never changes and
1580 			 * this parent was already marked as LIVE_READ.
1581 			 * There is no need to keep walking the chain again and
1582 			 * keep re-marking all parents as LIVE_READ.
1583 			 * This case happens when the same register is read
1584 			 * multiple times without writes into it in-between.
1585 			 * Also, if parent has the stronger REG_LIVE_READ64 set,
1586 			 * then no need to set the weak REG_LIVE_READ32.
1587 			 */
1588 			break;
1589 		/* ... then we depend on parent's value */
1590 		parent->live |= flag;
1591 		/* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1592 		if (flag == REG_LIVE_READ64)
1593 			parent->live &= ~REG_LIVE_READ32;
1594 		state = parent;
1595 		parent = state->parent;
1596 		writes = true;
1597 		cnt++;
1598 	}
1599 
1600 	if (env->longest_mark_read_walk < cnt)
1601 		env->longest_mark_read_walk = cnt;
1602 	return 0;
1603 }
1604 
1605 /* This function is supposed to be used by the following 32-bit optimization
1606  * code only. It returns TRUE if the source or destination register operates
1607  * on 64-bit, otherwise return FALSE.
1608  */
is_reg64(struct bpf_verifier_env * env,struct bpf_insn * insn,u32 regno,struct bpf_reg_state * reg,enum reg_arg_type t)1609 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
1610 		     u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)
1611 {
1612 	u8 code, class, op;
1613 
1614 	code = insn->code;
1615 	class = BPF_CLASS(code);
1616 	op = BPF_OP(code);
1617 	if (class == BPF_JMP) {
1618 		/* BPF_EXIT for "main" will reach here. Return TRUE
1619 		 * conservatively.
1620 		 */
1621 		if (op == BPF_EXIT)
1622 			return true;
1623 		if (op == BPF_CALL) {
1624 			/* BPF to BPF call will reach here because of marking
1625 			 * caller saved clobber with DST_OP_NO_MARK for which we
1626 			 * don't care the register def because they are anyway
1627 			 * marked as NOT_INIT already.
1628 			 */
1629 			if (insn->src_reg == BPF_PSEUDO_CALL)
1630 				return false;
1631 			/* Helper call will reach here because of arg type
1632 			 * check, conservatively return TRUE.
1633 			 */
1634 			if (t == SRC_OP)
1635 				return true;
1636 
1637 			return false;
1638 		}
1639 	}
1640 
1641 	if (class == BPF_ALU64 || class == BPF_JMP ||
1642 	    /* BPF_END always use BPF_ALU class. */
1643 	    (class == BPF_ALU && op == BPF_END && insn->imm == 64))
1644 		return true;
1645 
1646 	if (class == BPF_ALU || class == BPF_JMP32)
1647 		return false;
1648 
1649 	if (class == BPF_LDX) {
1650 		if (t != SRC_OP)
1651 			return BPF_SIZE(code) == BPF_DW;
1652 		/* LDX source must be ptr. */
1653 		return true;
1654 	}
1655 
1656 	if (class == BPF_STX) {
1657 		if (reg->type != SCALAR_VALUE)
1658 			return true;
1659 		return BPF_SIZE(code) == BPF_DW;
1660 	}
1661 
1662 	if (class == BPF_LD) {
1663 		u8 mode = BPF_MODE(code);
1664 
1665 		/* LD_IMM64 */
1666 		if (mode == BPF_IMM)
1667 			return true;
1668 
1669 		/* Both LD_IND and LD_ABS return 32-bit data. */
1670 		if (t != SRC_OP)
1671 			return  false;
1672 
1673 		/* Implicit ctx ptr. */
1674 		if (regno == BPF_REG_6)
1675 			return true;
1676 
1677 		/* Explicit source could be any width. */
1678 		return true;
1679 	}
1680 
1681 	if (class == BPF_ST)
1682 		/* The only source register for BPF_ST is a ptr. */
1683 		return true;
1684 
1685 	/* Conservatively return true at default. */
1686 	return true;
1687 }
1688 
1689 /* Return TRUE if INSN doesn't have explicit value define. */
insn_no_def(struct bpf_insn * insn)1690 static bool insn_no_def(struct bpf_insn *insn)
1691 {
1692 	u8 class = BPF_CLASS(insn->code);
1693 
1694 	return (class == BPF_JMP || class == BPF_JMP32 ||
1695 		class == BPF_STX || class == BPF_ST);
1696 }
1697 
1698 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_verifier_env * env,struct bpf_insn * insn)1699 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1700 {
1701 	if (insn_no_def(insn))
1702 		return false;
1703 
1704 	return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1705 }
1706 
mark_insn_zext(struct bpf_verifier_env * env,struct bpf_reg_state * reg)1707 static void mark_insn_zext(struct bpf_verifier_env *env,
1708 			   struct bpf_reg_state *reg)
1709 {
1710 	s32 def_idx = reg->subreg_def;
1711 
1712 	if (def_idx == DEF_NOT_SUBREG)
1713 		return;
1714 
1715 	env->insn_aux_data[def_idx - 1].zext_dst = true;
1716 	/* The dst will be zero extended, so won't be sub-register anymore. */
1717 	reg->subreg_def = DEF_NOT_SUBREG;
1718 }
1719 
check_reg_arg(struct bpf_verifier_env * env,u32 regno,enum reg_arg_type t)1720 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno,
1721 			 enum reg_arg_type t)
1722 {
1723 	struct bpf_verifier_state *vstate = env->cur_state;
1724 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
1725 	struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1726 	struct bpf_reg_state *reg, *regs = state->regs;
1727 	bool rw64;
1728 
1729 	if (regno >= MAX_BPF_REG) {
1730 		verbose(env, "R%d is invalid\n", regno);
1731 		return -EINVAL;
1732 	}
1733 
1734 	reg = &regs[regno];
1735 	rw64 = is_reg64(env, insn, regno, reg, t);
1736 	if (t == SRC_OP) {
1737 		/* check whether register used as source operand can be read */
1738 		if (reg->type == NOT_INIT) {
1739 			verbose(env, "R%d !read_ok\n", regno);
1740 			return -EACCES;
1741 		}
1742 		/* We don't need to worry about FP liveness because it's read-only */
1743 		if (regno == BPF_REG_FP)
1744 			return 0;
1745 
1746 		if (rw64)
1747 			mark_insn_zext(env, reg);
1748 
1749 		return mark_reg_read(env, reg, reg->parent,
1750 				     rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1751 	} else {
1752 		/* check whether register used as dest operand can be written to */
1753 		if (regno == BPF_REG_FP) {
1754 			verbose(env, "frame pointer is read only\n");
1755 			return -EACCES;
1756 		}
1757 		reg->live |= REG_LIVE_WRITTEN;
1758 		reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1759 		if (t == DST_OP)
1760 			mark_reg_unknown(env, regs, regno);
1761 	}
1762 	return 0;
1763 }
1764 
1765 /* for any branch, call, exit record the history of jmps in the given state */
push_jmp_history(struct bpf_verifier_env * env,struct bpf_verifier_state * cur)1766 static int push_jmp_history(struct bpf_verifier_env *env,
1767 			    struct bpf_verifier_state *cur)
1768 {
1769 	u32 cnt = cur->jmp_history_cnt;
1770 	struct bpf_idx_pair *p;
1771 
1772 	cnt++;
1773 	p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1774 	if (!p)
1775 		return -ENOMEM;
1776 	p[cnt - 1].idx = env->insn_idx;
1777 	p[cnt - 1].prev_idx = env->prev_insn_idx;
1778 	cur->jmp_history = p;
1779 	cur->jmp_history_cnt = cnt;
1780 	return 0;
1781 }
1782 
1783 /* Backtrack one insn at a time. If idx is not at the top of recorded
1784  * history then previous instruction came from straight line execution.
1785  */
get_prev_insn_idx(struct bpf_verifier_state * st,int i,u32 * history)1786 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i,
1787 			     u32 *history)
1788 {
1789 	u32 cnt = *history;
1790 
1791 	if (cnt && st->jmp_history[cnt - 1].idx == i) {
1792 		i = st->jmp_history[cnt - 1].prev_idx;
1793 		(*history)--;
1794 	} else {
1795 		i--;
1796 	}
1797 	return i;
1798 }
1799 
1800 /* For given verifier state backtrack_insn() is called from the last insn to
1801  * the first insn. Its purpose is to compute a bitmask of registers and
1802  * stack slots that needs precision in the parent verifier state.
1803  */
backtrack_insn(struct bpf_verifier_env * env,int idx,u32 * reg_mask,u64 * stack_mask)1804 static int backtrack_insn(struct bpf_verifier_env *env, int idx,
1805 			  u32 *reg_mask, u64 *stack_mask)
1806 {
1807 	const struct bpf_insn_cbs cbs = {
1808 		.cb_print	= verbose,
1809 		.private_data	= env,
1810 	};
1811 	struct bpf_insn *insn = env->prog->insnsi + idx;
1812 	u8 class = BPF_CLASS(insn->code);
1813 	u8 opcode = BPF_OP(insn->code);
1814 	u8 mode = BPF_MODE(insn->code);
1815 	u32 dreg = 1u << insn->dst_reg;
1816 	u32 sreg = 1u << insn->src_reg;
1817 	u32 spi;
1818 
1819 	if (insn->code == 0)
1820 		return 0;
1821 	if (env->log.level & BPF_LOG_LEVEL) {
1822 		verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1823 		verbose(env, "%d: ", idx);
1824 		print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1825 	}
1826 
1827 	if (class == BPF_ALU || class == BPF_ALU64) {
1828 		if (!(*reg_mask & dreg))
1829 			return 0;
1830 		if (opcode == BPF_MOV) {
1831 			if (BPF_SRC(insn->code) == BPF_X) {
1832 				/* dreg = sreg
1833 				 * dreg needs precision after this insn
1834 				 * sreg needs precision before this insn
1835 				 */
1836 				*reg_mask &= ~dreg;
1837 				*reg_mask |= sreg;
1838 			} else {
1839 				/* dreg = K
1840 				 * dreg needs precision after this insn.
1841 				 * Corresponding register is already marked
1842 				 * as precise=true in this verifier state.
1843 				 * No further markings in parent are necessary
1844 				 */
1845 				*reg_mask &= ~dreg;
1846 			}
1847 		} else {
1848 			if (BPF_SRC(insn->code) == BPF_X) {
1849 				/* dreg += sreg
1850 				 * both dreg and sreg need precision
1851 				 * before this insn
1852 				 */
1853 				*reg_mask |= sreg;
1854 			} /* else dreg += K
1855 			   * dreg still needs precision before this insn
1856 			   */
1857 		}
1858 	} else if (class == BPF_LDX) {
1859 		if (!(*reg_mask & dreg))
1860 			return 0;
1861 		*reg_mask &= ~dreg;
1862 
1863 		/* scalars can only be spilled into stack w/o losing precision.
1864 		 * Load from any other memory can be zero extended.
1865 		 * The desire to keep that precision is already indicated
1866 		 * by 'precise' mark in corresponding register of this state.
1867 		 * No further tracking necessary.
1868 		 */
1869 		if (insn->src_reg != BPF_REG_FP)
1870 			return 0;
1871 		if (BPF_SIZE(insn->code) != BPF_DW)
1872 			return 0;
1873 
1874 		/* dreg = *(u64 *)[fp - off] was a fill from the stack.
1875 		 * that [fp - off] slot contains scalar that needs to be
1876 		 * tracked with precision
1877 		 */
1878 		spi = (-insn->off - 1) / BPF_REG_SIZE;
1879 		if (spi >= 64) {
1880 			verbose(env, "BUG spi %d\n", spi);
1881 			WARN_ONCE(1, "verifier backtracking bug");
1882 			return -EFAULT;
1883 		}
1884 		*stack_mask |= 1ull << spi;
1885 	} else if (class == BPF_STX || class == BPF_ST) {
1886 		if (*reg_mask & dreg)
1887 			/* stx & st shouldn't be using _scalar_ dst_reg
1888 			 * to access memory. It means backtracking
1889 			 * encountered a case of pointer subtraction.
1890 			 */
1891 			return -ENOTSUPP;
1892 		/* scalars can only be spilled into stack */
1893 		if (insn->dst_reg != BPF_REG_FP)
1894 			return 0;
1895 		if (BPF_SIZE(insn->code) != BPF_DW)
1896 			return 0;
1897 		spi = (-insn->off - 1) / BPF_REG_SIZE;
1898 		if (spi >= 64) {
1899 			verbose(env, "BUG spi %d\n", spi);
1900 			WARN_ONCE(1, "verifier backtracking bug");
1901 			return -EFAULT;
1902 		}
1903 		if (!(*stack_mask & (1ull << spi)))
1904 			return 0;
1905 		*stack_mask &= ~(1ull << spi);
1906 		if (class == BPF_STX)
1907 			*reg_mask |= sreg;
1908 	} else if (class == BPF_JMP || class == BPF_JMP32) {
1909 		if (opcode == BPF_CALL) {
1910 			if (insn->src_reg == BPF_PSEUDO_CALL)
1911 				return -ENOTSUPP;
1912 			/* regular helper call sets R0 */
1913 			*reg_mask &= ~1;
1914 			if (*reg_mask & 0x3f) {
1915 				/* if backtracing was looking for registers R1-R5
1916 				 * they should have been found already.
1917 				 */
1918 				verbose(env, "BUG regs %x\n", *reg_mask);
1919 				WARN_ONCE(1, "verifier backtracking bug");
1920 				return -EFAULT;
1921 			}
1922 		} else if (opcode == BPF_EXIT) {
1923 			return -ENOTSUPP;
1924 		}
1925 	} else if (class == BPF_LD) {
1926 		if (!(*reg_mask & dreg))
1927 			return 0;
1928 		*reg_mask &= ~dreg;
1929 		/* It's ld_imm64 or ld_abs or ld_ind.
1930 		 * For ld_imm64 no further tracking of precision
1931 		 * into parent is necessary
1932 		 */
1933 		if (mode == BPF_IND || mode == BPF_ABS)
1934 			/* to be analyzed */
1935 			return -ENOTSUPP;
1936 	}
1937 	return 0;
1938 }
1939 
1940 /* the scalar precision tracking algorithm:
1941  * . at the start all registers have precise=false.
1942  * . scalar ranges are tracked as normal through alu and jmp insns.
1943  * . once precise value of the scalar register is used in:
1944  *   .  ptr + scalar alu
1945  *   . if (scalar cond K|scalar)
1946  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1947  *   backtrack through the verifier states and mark all registers and
1948  *   stack slots with spilled constants that these scalar regisers
1949  *   should be precise.
1950  * . during state pruning two registers (or spilled stack slots)
1951  *   are equivalent if both are not precise.
1952  *
1953  * Note the verifier cannot simply walk register parentage chain,
1954  * since many different registers and stack slots could have been
1955  * used to compute single precise scalar.
1956  *
1957  * The approach of starting with precise=true for all registers and then
1958  * backtrack to mark a register as not precise when the verifier detects
1959  * that program doesn't care about specific value (e.g., when helper
1960  * takes register as ARG_ANYTHING parameter) is not safe.
1961  *
1962  * It's ok to walk single parentage chain of the verifier states.
1963  * It's possible that this backtracking will go all the way till 1st insn.
1964  * All other branches will be explored for needing precision later.
1965  *
1966  * The backtracking needs to deal with cases like:
1967  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1968  * r9 -= r8
1969  * r5 = r9
1970  * if r5 > 0x79f goto pc+7
1971  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1972  * r5 += 1
1973  * ...
1974  * call bpf_perf_event_output#25
1975  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1976  *
1977  * and this case:
1978  * r6 = 1
1979  * call foo // uses callee's r6 inside to compute r0
1980  * r0 += r6
1981  * if r0 == 0 goto
1982  *
1983  * to track above reg_mask/stack_mask needs to be independent for each frame.
1984  *
1985  * Also if parent's curframe > frame where backtracking started,
1986  * the verifier need to mark registers in both frames, otherwise callees
1987  * may incorrectly prune callers. This is similar to
1988  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1989  *
1990  * For now backtracking falls back into conservative marking.
1991  */
mark_all_scalars_precise(struct bpf_verifier_env * env,struct bpf_verifier_state * st)1992 static void mark_all_scalars_precise(struct bpf_verifier_env *env,
1993 				     struct bpf_verifier_state *st)
1994 {
1995 	struct bpf_func_state *func;
1996 	struct bpf_reg_state *reg;
1997 	int i, j;
1998 
1999 	/* big hammer: mark all scalars precise in this path.
2000 	 * pop_stack may still get !precise scalars.
2001 	 */
2002 	for (; st; st = st->parent)
2003 		for (i = 0; i <= st->curframe; i++) {
2004 			func = st->frame[i];
2005 			for (j = 0; j < BPF_REG_FP; j++) {
2006 				reg = &func->regs[j];
2007 				if (reg->type != SCALAR_VALUE)
2008 					continue;
2009 				reg->precise = true;
2010 			}
2011 			for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2012 				if (func->stack[j].slot_type[0] != STACK_SPILL)
2013 					continue;
2014 				reg = &func->stack[j].spilled_ptr;
2015 				if (reg->type != SCALAR_VALUE)
2016 					continue;
2017 				reg->precise = true;
2018 			}
2019 		}
2020 }
2021 
__mark_chain_precision(struct bpf_verifier_env * env,int regno,int spi)2022 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
2023 				  int spi)
2024 {
2025 	struct bpf_verifier_state *st = env->cur_state;
2026 	int first_idx = st->first_insn_idx;
2027 	int last_idx = env->insn_idx;
2028 	struct bpf_func_state *func;
2029 	struct bpf_reg_state *reg;
2030 	u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2031 	u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2032 	bool skip_first = true;
2033 	bool new_marks = false;
2034 	int i, err;
2035 
2036 	if (!env->bpf_capable)
2037 		return 0;
2038 
2039 	func = st->frame[st->curframe];
2040 	if (regno >= 0) {
2041 		reg = &func->regs[regno];
2042 		if (reg->type != SCALAR_VALUE) {
2043 			WARN_ONCE(1, "backtracing misuse");
2044 			return -EFAULT;
2045 		}
2046 		if (!reg->precise)
2047 			new_marks = true;
2048 		else
2049 			reg_mask = 0;
2050 		reg->precise = true;
2051 	}
2052 
2053 	while (spi >= 0) {
2054 		if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2055 			stack_mask = 0;
2056 			break;
2057 		}
2058 		reg = &func->stack[spi].spilled_ptr;
2059 		if (reg->type != SCALAR_VALUE) {
2060 			stack_mask = 0;
2061 			break;
2062 		}
2063 		if (!reg->precise)
2064 			new_marks = true;
2065 		else
2066 			stack_mask = 0;
2067 		reg->precise = true;
2068 		break;
2069 	}
2070 
2071 	if (!new_marks)
2072 		return 0;
2073 	if (!reg_mask && !stack_mask)
2074 		return 0;
2075 	for (;;) {
2076 		DECLARE_BITMAP(mask, 64);
2077 		u32 history = st->jmp_history_cnt;
2078 
2079 		if (env->log.level & BPF_LOG_LEVEL)
2080 			verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2081 		for (i = last_idx;;) {
2082 			if (skip_first) {
2083 				err = 0;
2084 				skip_first = false;
2085 			} else {
2086 				err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2087 			}
2088 			if (err == -ENOTSUPP) {
2089 				mark_all_scalars_precise(env, st);
2090 				return 0;
2091 			} else if (err) {
2092 				return err;
2093 			}
2094 			if (!reg_mask && !stack_mask)
2095 				/* Found assignment(s) into tracked register in this state.
2096 				 * Since this state is already marked, just return.
2097 				 * Nothing to be tracked further in the parent state.
2098 				 */
2099 				return 0;
2100 			if (i == first_idx)
2101 				break;
2102 			i = get_prev_insn_idx(st, i, &history);
2103 			if (i >= env->prog->len) {
2104 				/* This can happen if backtracking reached insn 0
2105 				 * and there are still reg_mask or stack_mask
2106 				 * to backtrack.
2107 				 * It means the backtracking missed the spot where
2108 				 * particular register was initialized with a constant.
2109 				 */
2110 				verbose(env, "BUG backtracking idx %d\n", i);
2111 				WARN_ONCE(1, "verifier backtracking bug");
2112 				return -EFAULT;
2113 			}
2114 		}
2115 		st = st->parent;
2116 		if (!st)
2117 			break;
2118 
2119 		new_marks = false;
2120 		func = st->frame[st->curframe];
2121 		bitmap_from_u64(mask, reg_mask);
2122 		for_each_set_bit(i, mask, 32) {
2123 			reg = &func->regs[i];
2124 			if (reg->type != SCALAR_VALUE) {
2125 				reg_mask &= ~(1u << i);
2126 				continue;
2127 			}
2128 			if (!reg->precise)
2129 				new_marks = true;
2130 			reg->precise = true;
2131 		}
2132 
2133 		bitmap_from_u64(mask, stack_mask);
2134 		for_each_set_bit(i, mask, 64) {
2135 			if (i >= func->allocated_stack / BPF_REG_SIZE) {
2136 				/* the sequence of instructions:
2137 				 * 2: (bf) r3 = r10
2138 				 * 3: (7b) *(u64 *)(r3 -8) = r0
2139 				 * 4: (79) r4 = *(u64 *)(r10 -8)
2140 				 * doesn't contain jmps. It's backtracked
2141 				 * as a single block.
2142 				 * During backtracking insn 3 is not recognized as
2143 				 * stack access, so at the end of backtracking
2144 				 * stack slot fp-8 is still marked in stack_mask.
2145 				 * However the parent state may not have accessed
2146 				 * fp-8 and it's "unallocated" stack space.
2147 				 * In such case fallback to conservative.
2148 				 */
2149 				mark_all_scalars_precise(env, st);
2150 				return 0;
2151 			}
2152 
2153 			if (func->stack[i].slot_type[0] != STACK_SPILL) {
2154 				stack_mask &= ~(1ull << i);
2155 				continue;
2156 			}
2157 			reg = &func->stack[i].spilled_ptr;
2158 			if (reg->type != SCALAR_VALUE) {
2159 				stack_mask &= ~(1ull << i);
2160 				continue;
2161 			}
2162 			if (!reg->precise)
2163 				new_marks = true;
2164 			reg->precise = true;
2165 		}
2166 		if (env->log.level & BPF_LOG_LEVEL) {
2167 			print_verifier_state(env, func);
2168 			verbose(env, "parent %s regs=%x stack=%llx marks\n",
2169 				new_marks ? "didn't have" : "already had",
2170 				reg_mask, stack_mask);
2171 		}
2172 
2173 		if (!reg_mask && !stack_mask)
2174 			break;
2175 		if (!new_marks)
2176 			break;
2177 
2178 		last_idx = st->last_insn_idx;
2179 		first_idx = st->first_insn_idx;
2180 	}
2181 	return 0;
2182 }
2183 
mark_chain_precision(struct bpf_verifier_env * env,int regno)2184 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2185 {
2186 	return __mark_chain_precision(env, regno, -1);
2187 }
2188 
mark_chain_precision_stack(struct bpf_verifier_env * env,int spi)2189 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2190 {
2191 	return __mark_chain_precision(env, -1, spi);
2192 }
2193 
is_spillable_regtype(enum bpf_reg_type type)2194 static bool is_spillable_regtype(enum bpf_reg_type type)
2195 {
2196 	switch (type) {
2197 	case PTR_TO_MAP_VALUE:
2198 	case PTR_TO_MAP_VALUE_OR_NULL:
2199 	case PTR_TO_STACK:
2200 	case PTR_TO_CTX:
2201 	case PTR_TO_PACKET:
2202 	case PTR_TO_PACKET_META:
2203 	case PTR_TO_PACKET_END:
2204 	case PTR_TO_FLOW_KEYS:
2205 	case CONST_PTR_TO_MAP:
2206 	case PTR_TO_SOCKET:
2207 	case PTR_TO_SOCKET_OR_NULL:
2208 	case PTR_TO_SOCK_COMMON:
2209 	case PTR_TO_SOCK_COMMON_OR_NULL:
2210 	case PTR_TO_TCP_SOCK:
2211 	case PTR_TO_TCP_SOCK_OR_NULL:
2212 	case PTR_TO_XDP_SOCK:
2213 	case PTR_TO_BTF_ID:
2214 	case PTR_TO_BTF_ID_OR_NULL:
2215 	case PTR_TO_RDONLY_BUF:
2216 	case PTR_TO_RDONLY_BUF_OR_NULL:
2217 	case PTR_TO_RDWR_BUF:
2218 	case PTR_TO_RDWR_BUF_OR_NULL:
2219 	case PTR_TO_PERCPU_BTF_ID:
2220 	case PTR_TO_MEM:
2221 	case PTR_TO_MEM_OR_NULL:
2222 		return true;
2223 	default:
2224 		return false;
2225 	}
2226 }
2227 
2228 /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state * reg)2229 static bool register_is_null(struct bpf_reg_state *reg)
2230 {
2231 	return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2232 }
2233 
register_is_const(struct bpf_reg_state * reg)2234 static bool register_is_const(struct bpf_reg_state *reg)
2235 {
2236 	return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2237 }
2238 
__is_scalar_unbounded(struct bpf_reg_state * reg)2239 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2240 {
2241 	return tnum_is_unknown(reg->var_off) &&
2242 	       reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2243 	       reg->umin_value == 0 && reg->umax_value == U64_MAX &&
2244 	       reg->s32_min_value == S32_MIN && reg->s32_max_value == S32_MAX &&
2245 	       reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2246 }
2247 
register_is_bounded(struct bpf_reg_state * reg)2248 static bool register_is_bounded(struct bpf_reg_state *reg)
2249 {
2250 	return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2251 }
2252 
__is_pointer_value(bool allow_ptr_leaks,const struct bpf_reg_state * reg)2253 static bool __is_pointer_value(bool allow_ptr_leaks,
2254 			       const struct bpf_reg_state *reg)
2255 {
2256 	if (allow_ptr_leaks)
2257 		return false;
2258 
2259 	return reg->type != SCALAR_VALUE;
2260 }
2261 
save_register_state(struct bpf_func_state * state,int spi,struct bpf_reg_state * reg)2262 static void save_register_state(struct bpf_func_state *state,
2263 				int spi, struct bpf_reg_state *reg)
2264 {
2265 	int i;
2266 
2267 	state->stack[spi].spilled_ptr = *reg;
2268 	state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2269 
2270 	for (i = 0; i < BPF_REG_SIZE; i++)
2271 		state->stack[spi].slot_type[i] = STACK_SPILL;
2272 }
2273 
2274 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2275  * stack boundary and alignment are checked in check_mem_access()
2276  */
check_stack_write_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int off,int size,int value_regno,int insn_idx)2277 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2278 				       /* stack frame we're writing to */
2279 				       struct bpf_func_state *state,
2280 				       int off, int size, int value_regno,
2281 				       int insn_idx)
2282 {
2283 	struct bpf_func_state *cur; /* state of the current function */
2284 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2285 	u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2286 	struct bpf_reg_state *reg = NULL;
2287 
2288 	err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE),
2289 				 state->acquired_refs, true);
2290 	if (err)
2291 		return err;
2292 	/* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2293 	 * so it's aligned access and [off, off + size) are within stack limits
2294 	 */
2295 	if (!env->allow_ptr_leaks &&
2296 	    state->stack[spi].slot_type[0] == STACK_SPILL &&
2297 	    size != BPF_REG_SIZE) {
2298 		verbose(env, "attempt to corrupt spilled pointer on stack\n");
2299 		return -EACCES;
2300 	}
2301 
2302 	cur = env->cur_state->frame[env->cur_state->curframe];
2303 	if (value_regno >= 0)
2304 		reg = &cur->regs[value_regno];
2305 	if (!env->bypass_spec_v4) {
2306 		bool sanitize = reg && is_spillable_regtype(reg->type);
2307 
2308 		for (i = 0; i < size; i++) {
2309 			if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2310 				sanitize = true;
2311 				break;
2312 			}
2313 		}
2314 
2315 		if (sanitize)
2316 			env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2317 	}
2318 
2319 	if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) &&
2320 	    !register_is_null(reg) && env->bpf_capable) {
2321 		if (dst_reg != BPF_REG_FP) {
2322 			/* The backtracking logic can only recognize explicit
2323 			 * stack slot address like [fp - 8]. Other spill of
2324 			 * scalar via different register has to be conervative.
2325 			 * Backtrack from here and mark all registers as precise
2326 			 * that contributed into 'reg' being a constant.
2327 			 */
2328 			err = mark_chain_precision(env, value_regno);
2329 			if (err)
2330 				return err;
2331 		}
2332 		save_register_state(state, spi, reg);
2333 	} else if (reg && is_spillable_regtype(reg->type)) {
2334 		/* register containing pointer is being spilled into stack */
2335 		if (size != BPF_REG_SIZE) {
2336 			verbose_linfo(env, insn_idx, "; ");
2337 			verbose(env, "invalid size of register spill\n");
2338 			return -EACCES;
2339 		}
2340 		if (state != cur && reg->type == PTR_TO_STACK) {
2341 			verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2342 			return -EINVAL;
2343 		}
2344 		save_register_state(state, spi, reg);
2345 	} else {
2346 		u8 type = STACK_MISC;
2347 
2348 		/* regular write of data into stack destroys any spilled ptr */
2349 		state->stack[spi].spilled_ptr.type = NOT_INIT;
2350 		/* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2351 		if (state->stack[spi].slot_type[0] == STACK_SPILL)
2352 			for (i = 0; i < BPF_REG_SIZE; i++)
2353 				state->stack[spi].slot_type[i] = STACK_MISC;
2354 
2355 		/* only mark the slot as written if all 8 bytes were written
2356 		 * otherwise read propagation may incorrectly stop too soon
2357 		 * when stack slots are partially written.
2358 		 * This heuristic means that read propagation will be
2359 		 * conservative, since it will add reg_live_read marks
2360 		 * to stack slots all the way to first state when programs
2361 		 * writes+reads less than 8 bytes
2362 		 */
2363 		if (size == BPF_REG_SIZE)
2364 			state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2365 
2366 		/* when we zero initialize stack slots mark them as such */
2367 		if (reg && register_is_null(reg)) {
2368 			/* backtracking doesn't work for STACK_ZERO yet. */
2369 			err = mark_chain_precision(env, value_regno);
2370 			if (err)
2371 				return err;
2372 			type = STACK_ZERO;
2373 		}
2374 
2375 		/* Mark slots affected by this stack write. */
2376 		for (i = 0; i < size; i++)
2377 			state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] =
2378 				type;
2379 	}
2380 	return 0;
2381 }
2382 
2383 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2384  * known to contain a variable offset.
2385  * This function checks whether the write is permitted and conservatively
2386  * tracks the effects of the write, considering that each stack slot in the
2387  * dynamic range is potentially written to.
2388  *
2389  * 'off' includes 'regno->off'.
2390  * 'value_regno' can be -1, meaning that an unknown value is being written to
2391  * the stack.
2392  *
2393  * Spilled pointers in range are not marked as written because we don't know
2394  * what's going to be actually written. This means that read propagation for
2395  * future reads cannot be terminated by this write.
2396  *
2397  * For privileged programs, uninitialized stack slots are considered
2398  * initialized by this write (even though we don't know exactly what offsets
2399  * are going to be written to). The idea is that we don't want the verifier to
2400  * reject future reads that access slots written to through variable offsets.
2401  */
check_stack_write_var_off(struct bpf_verifier_env * env,struct bpf_func_state * state,int ptr_regno,int off,int size,int value_regno,int insn_idx)2402 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2403 				     /* func where register points to */
2404 				     struct bpf_func_state *state,
2405 				     int ptr_regno, int off, int size,
2406 				     int value_regno, int insn_idx)
2407 {
2408 	struct bpf_func_state *cur; /* state of the current function */
2409 	int min_off, max_off;
2410 	int i, err;
2411 	struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2412 	bool writing_zero = false;
2413 	/* set if the fact that we're writing a zero is used to let any
2414 	 * stack slots remain STACK_ZERO
2415 	 */
2416 	bool zero_used = false;
2417 
2418 	cur = env->cur_state->frame[env->cur_state->curframe];
2419 	ptr_reg = &cur->regs[ptr_regno];
2420 	min_off = ptr_reg->smin_value + off;
2421 	max_off = ptr_reg->smax_value + off + size;
2422 	if (value_regno >= 0)
2423 		value_reg = &cur->regs[value_regno];
2424 	if (value_reg && register_is_null(value_reg))
2425 		writing_zero = true;
2426 
2427 	err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE),
2428 				 state->acquired_refs, true);
2429 	if (err)
2430 		return err;
2431 
2432 
2433 	/* Variable offset writes destroy any spilled pointers in range. */
2434 	for (i = min_off; i < max_off; i++) {
2435 		u8 new_type, *stype;
2436 		int slot, spi;
2437 
2438 		slot = -i - 1;
2439 		spi = slot / BPF_REG_SIZE;
2440 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2441 
2442 		if (!env->allow_ptr_leaks
2443 				&& *stype != NOT_INIT
2444 				&& *stype != SCALAR_VALUE) {
2445 			/* Reject the write if there's are spilled pointers in
2446 			 * range. If we didn't reject here, the ptr status
2447 			 * would be erased below (even though not all slots are
2448 			 * actually overwritten), possibly opening the door to
2449 			 * leaks.
2450 			 */
2451 			verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d",
2452 				insn_idx, i);
2453 			return -EINVAL;
2454 		}
2455 
2456 		/* Erase all spilled pointers. */
2457 		state->stack[spi].spilled_ptr.type = NOT_INIT;
2458 
2459 		/* Update the slot type. */
2460 		new_type = STACK_MISC;
2461 		if (writing_zero && *stype == STACK_ZERO) {
2462 			new_type = STACK_ZERO;
2463 			zero_used = true;
2464 		}
2465 		/* If the slot is STACK_INVALID, we check whether it's OK to
2466 		 * pretend that it will be initialized by this write. The slot
2467 		 * might not actually be written to, and so if we mark it as
2468 		 * initialized future reads might leak uninitialized memory.
2469 		 * For privileged programs, we will accept such reads to slots
2470 		 * that may or may not be written because, if we're reject
2471 		 * them, the error would be too confusing.
2472 		 */
2473 		if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2474 			verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d",
2475 					insn_idx, i);
2476 			return -EINVAL;
2477 		}
2478 		*stype = new_type;
2479 	}
2480 	if (zero_used) {
2481 		/* backtracking doesn't work for STACK_ZERO yet. */
2482 		err = mark_chain_precision(env, value_regno);
2483 		if (err)
2484 			return err;
2485 	}
2486 	return 0;
2487 }
2488 
2489 /* When register 'dst_regno' is assigned some values from stack[min_off,
2490  * max_off), we set the register's type according to the types of the
2491  * respective stack slots. If all the stack values are known to be zeros, then
2492  * so is the destination reg. Otherwise, the register is considered to be
2493  * SCALAR. This function does not deal with register filling; the caller must
2494  * ensure that all spilled registers in the stack range have been marked as
2495  * read.
2496  */
mark_reg_stack_read(struct bpf_verifier_env * env,struct bpf_func_state * ptr_state,int min_off,int max_off,int dst_regno)2497 static void mark_reg_stack_read(struct bpf_verifier_env *env,
2498 				/* func where src register points to */
2499 				struct bpf_func_state *ptr_state,
2500 				int min_off, int max_off, int dst_regno)
2501 {
2502 	struct bpf_verifier_state *vstate = env->cur_state;
2503 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2504 	int i, slot, spi;
2505 	u8 *stype;
2506 	int zeros = 0;
2507 
2508 	for (i = min_off; i < max_off; i++) {
2509 		slot = -i - 1;
2510 		spi = slot / BPF_REG_SIZE;
2511 		stype = ptr_state->stack[spi].slot_type;
2512 		if (stype[slot % BPF_REG_SIZE] != STACK_ZERO)
2513 			break;
2514 		zeros++;
2515 	}
2516 	if (zeros == max_off - min_off) {
2517 		/* any access_size read into register is zero extended,
2518 		 * so the whole register == const_zero
2519 		 */
2520 		__mark_reg_const_zero(&state->regs[dst_regno]);
2521 		/* backtracking doesn't support STACK_ZERO yet,
2522 		 * so mark it precise here, so that later
2523 		 * backtracking can stop here.
2524 		 * Backtracking may not need this if this register
2525 		 * doesn't participate in pointer adjustment.
2526 		 * Forward propagation of precise flag is not
2527 		 * necessary either. This mark is only to stop
2528 		 * backtracking. Any register that contributed
2529 		 * to const 0 was marked precise before spill.
2530 		 */
2531 		state->regs[dst_regno].precise = true;
2532 	} else {
2533 		/* have read misc data from the stack */
2534 		mark_reg_unknown(env, state->regs, dst_regno);
2535 	}
2536 	state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2537 }
2538 
2539 /* Read the stack at 'off' and put the results into the register indicated by
2540  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2541  * spilled reg.
2542  *
2543  * 'dst_regno' can be -1, meaning that the read value is not going to a
2544  * register.
2545  *
2546  * The access is assumed to be within the current stack bounds.
2547  */
check_stack_read_fixed_off(struct bpf_verifier_env * env,struct bpf_func_state * reg_state,int off,int size,int dst_regno)2548 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2549 				      /* func where src register points to */
2550 				      struct bpf_func_state *reg_state,
2551 				      int off, int size, int dst_regno)
2552 {
2553 	struct bpf_verifier_state *vstate = env->cur_state;
2554 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2555 	int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2556 	struct bpf_reg_state *reg;
2557 	u8 *stype;
2558 
2559 	stype = reg_state->stack[spi].slot_type;
2560 	reg = &reg_state->stack[spi].spilled_ptr;
2561 
2562 	if (stype[0] == STACK_SPILL) {
2563 		if (size != BPF_REG_SIZE) {
2564 			if (reg->type != SCALAR_VALUE) {
2565 				verbose_linfo(env, env->insn_idx, "; ");
2566 				verbose(env, "invalid size of register fill\n");
2567 				return -EACCES;
2568 			}
2569 			if (dst_regno >= 0) {
2570 				mark_reg_unknown(env, state->regs, dst_regno);
2571 				state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2572 			}
2573 			mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2574 			return 0;
2575 		}
2576 		for (i = 1; i < BPF_REG_SIZE; i++) {
2577 			if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2578 				verbose(env, "corrupted spill memory\n");
2579 				return -EACCES;
2580 			}
2581 		}
2582 
2583 		if (dst_regno >= 0) {
2584 			/* restore register state from stack */
2585 			state->regs[dst_regno] = *reg;
2586 			/* mark reg as written since spilled pointer state likely
2587 			 * has its liveness marks cleared by is_state_visited()
2588 			 * which resets stack/reg liveness for state transitions
2589 			 */
2590 			state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2591 		} else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2592 			/* If dst_regno==-1, the caller is asking us whether
2593 			 * it is acceptable to use this value as a SCALAR_VALUE
2594 			 * (e.g. for XADD).
2595 			 * We must not allow unprivileged callers to do that
2596 			 * with spilled pointers.
2597 			 */
2598 			verbose(env, "leaking pointer from stack off %d\n",
2599 				off);
2600 			return -EACCES;
2601 		}
2602 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2603 	} else {
2604 		u8 type;
2605 
2606 		for (i = 0; i < size; i++) {
2607 			type = stype[(slot - i) % BPF_REG_SIZE];
2608 			if (type == STACK_MISC)
2609 				continue;
2610 			if (type == STACK_ZERO)
2611 				continue;
2612 			verbose(env, "invalid read from stack off %d+%d size %d\n",
2613 				off, i, size);
2614 			return -EACCES;
2615 		}
2616 		mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2617 		if (dst_regno >= 0)
2618 			mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
2619 	}
2620 	return 0;
2621 }
2622 
2623 enum stack_access_src {
2624 	ACCESS_DIRECT = 1,  /* the access is performed by an instruction */
2625 	ACCESS_HELPER = 2,  /* the access is performed by a helper */
2626 };
2627 
2628 static int check_stack_range_initialized(struct bpf_verifier_env *env,
2629 					 int regno, int off, int access_size,
2630 					 bool zero_size_allowed,
2631 					 enum stack_access_src type,
2632 					 struct bpf_call_arg_meta *meta);
2633 
reg_state(struct bpf_verifier_env * env,int regno)2634 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2635 {
2636 	return cur_regs(env) + regno;
2637 }
2638 
2639 /* Read the stack at 'ptr_regno + off' and put the result into the register
2640  * 'dst_regno'.
2641  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2642  * but not its variable offset.
2643  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2644  *
2645  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2646  * filling registers (i.e. reads of spilled register cannot be detected when
2647  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2648  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2649  * offset; for a fixed offset check_stack_read_fixed_off should be used
2650  * instead.
2651  */
check_stack_read_var_off(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)2652 static int check_stack_read_var_off(struct bpf_verifier_env *env,
2653 				    int ptr_regno, int off, int size, int dst_regno)
2654 {
2655 	/* The state of the source register. */
2656 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2657 	struct bpf_func_state *ptr_state = func(env, reg);
2658 	int err;
2659 	int min_off, max_off;
2660 
2661 	/* Note that we pass a NULL meta, so raw access will not be permitted.
2662 	 */
2663 	err = check_stack_range_initialized(env, ptr_regno, off, size,
2664 					    false, ACCESS_DIRECT, NULL);
2665 	if (err)
2666 		return err;
2667 
2668 	min_off = reg->smin_value + off;
2669 	max_off = reg->smax_value + off;
2670 	mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2671 	return 0;
2672 }
2673 
2674 /* check_stack_read dispatches to check_stack_read_fixed_off or
2675  * check_stack_read_var_off.
2676  *
2677  * The caller must ensure that the offset falls within the allocated stack
2678  * bounds.
2679  *
2680  * 'dst_regno' is a register which will receive the value from the stack. It
2681  * can be -1, meaning that the read value is not going to a register.
2682  */
check_stack_read(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int dst_regno)2683 static int check_stack_read(struct bpf_verifier_env *env,
2684 			    int ptr_regno, int off, int size,
2685 			    int dst_regno)
2686 {
2687 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2688 	struct bpf_func_state *state = func(env, reg);
2689 	int err;
2690 	/* Some accesses are only permitted with a static offset. */
2691 	bool var_off = !tnum_is_const(reg->var_off);
2692 
2693 	/* The offset is required to be static when reads don't go to a
2694 	 * register, in order to not leak pointers (see
2695 	 * check_stack_read_fixed_off).
2696 	 */
2697 	if (dst_regno < 0 && var_off) {
2698 		char tn_buf[48];
2699 
2700 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2701 		verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
2702 			tn_buf, off, size);
2703 		return -EACCES;
2704 	}
2705 	/* Variable offset is prohibited for unprivileged mode for simplicity
2706 	 * since it requires corresponding support in Spectre masking for stack
2707 	 * ALU. See also retrieve_ptr_limit().
2708 	 */
2709 	if (!env->bypass_spec_v1 && var_off) {
2710 		char tn_buf[48];
2711 
2712 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2713 		verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n",
2714 				ptr_regno, tn_buf);
2715 		return -EACCES;
2716 	}
2717 
2718 	if (!var_off) {
2719 		off += reg->var_off.value;
2720 		err = check_stack_read_fixed_off(env, state, off, size,
2721 						 dst_regno);
2722 	} else {
2723 		/* Variable offset stack reads need more conservative handling
2724 		 * than fixed offset ones. Note that dst_regno >= 0 on this
2725 		 * branch.
2726 		 */
2727 		err = check_stack_read_var_off(env, ptr_regno, off, size,
2728 					       dst_regno);
2729 	}
2730 	return err;
2731 }
2732 
2733 
2734 /* check_stack_write dispatches to check_stack_write_fixed_off or
2735  * check_stack_write_var_off.
2736  *
2737  * 'ptr_regno' is the register used as a pointer into the stack.
2738  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2739  * 'value_regno' is the register whose value we're writing to the stack. It can
2740  * be -1, meaning that we're not writing from a register.
2741  *
2742  * The caller must ensure that the offset falls within the maximum stack size.
2743  */
check_stack_write(struct bpf_verifier_env * env,int ptr_regno,int off,int size,int value_regno,int insn_idx)2744 static int check_stack_write(struct bpf_verifier_env *env,
2745 			     int ptr_regno, int off, int size,
2746 			     int value_regno, int insn_idx)
2747 {
2748 	struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2749 	struct bpf_func_state *state = func(env, reg);
2750 	int err;
2751 
2752 	if (tnum_is_const(reg->var_off)) {
2753 		off += reg->var_off.value;
2754 		err = check_stack_write_fixed_off(env, state, off, size,
2755 						  value_regno, insn_idx);
2756 	} else {
2757 		/* Variable offset stack reads need more conservative handling
2758 		 * than fixed offset ones.
2759 		 */
2760 		err = check_stack_write_var_off(env, state,
2761 						ptr_regno, off, size,
2762 						value_regno, insn_idx);
2763 	}
2764 	return err;
2765 }
2766 
check_map_access_type(struct bpf_verifier_env * env,u32 regno,int off,int size,enum bpf_access_type type)2767 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
2768 				 int off, int size, enum bpf_access_type type)
2769 {
2770 	struct bpf_reg_state *regs = cur_regs(env);
2771 	struct bpf_map *map = regs[regno].map_ptr;
2772 	u32 cap = bpf_map_flags_to_cap(map);
2773 
2774 	if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2775 		verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n",
2776 			map->value_size, off, size);
2777 		return -EACCES;
2778 	}
2779 
2780 	if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2781 		verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n",
2782 			map->value_size, off, size);
2783 		return -EACCES;
2784 	}
2785 
2786 	return 0;
2787 }
2788 
2789 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env * env,int regno,int off,int size,u32 mem_size,bool zero_size_allowed)2790 static int __check_mem_access(struct bpf_verifier_env *env, int regno,
2791 			      int off, int size, u32 mem_size,
2792 			      bool zero_size_allowed)
2793 {
2794 	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2795 	struct bpf_reg_state *reg;
2796 
2797 	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
2798 		return 0;
2799 
2800 	reg = &cur_regs(env)[regno];
2801 	switch (reg->type) {
2802 	case PTR_TO_MAP_VALUE:
2803 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
2804 			mem_size, off, size);
2805 		break;
2806 	case PTR_TO_PACKET:
2807 	case PTR_TO_PACKET_META:
2808 	case PTR_TO_PACKET_END:
2809 		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
2810 			off, size, regno, reg->id, off, mem_size);
2811 		break;
2812 	case PTR_TO_MEM:
2813 	default:
2814 		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
2815 			mem_size, off, size);
2816 	}
2817 
2818 	return -EACCES;
2819 }
2820 
2821 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env * env,u32 regno,int off,int size,u32 mem_size,bool zero_size_allowed)2822 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
2823 				   int off, int size, u32 mem_size,
2824 				   bool zero_size_allowed)
2825 {
2826 	struct bpf_verifier_state *vstate = env->cur_state;
2827 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2828 	struct bpf_reg_state *reg = &state->regs[regno];
2829 	int err;
2830 
2831 	/* We may have adjusted the register pointing to memory region, so we
2832 	 * need to try adding each of min_value and max_value to off
2833 	 * to make sure our theoretical access will be safe.
2834 	 */
2835 	if (env->log.level & BPF_LOG_LEVEL)
2836 		print_verifier_state(env, state);
2837 
2838 	/* The minimum value is only important with signed
2839 	 * comparisons where we can't assume the floor of a
2840 	 * value is 0.  If we are using signed variables for our
2841 	 * index'es we need to make sure that whatever we use
2842 	 * will have a set floor within our range.
2843 	 */
2844 	if (reg->smin_value < 0 &&
2845 	    (reg->smin_value == S64_MIN ||
2846 	     (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2847 	      reg->smin_value + off < 0)) {
2848 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2849 			regno);
2850 		return -EACCES;
2851 	}
2852 	err = __check_mem_access(env, regno, reg->smin_value + off, size,
2853 				 mem_size, zero_size_allowed);
2854 	if (err) {
2855 		verbose(env, "R%d min value is outside of the allowed memory range\n",
2856 			regno);
2857 		return err;
2858 	}
2859 
2860 	/* If we haven't set a max value then we need to bail since we can't be
2861 	 * sure we won't do bad things.
2862 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
2863 	 */
2864 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2865 		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
2866 			regno);
2867 		return -EACCES;
2868 	}
2869 	err = __check_mem_access(env, regno, reg->umax_value + off, size,
2870 				 mem_size, zero_size_allowed);
2871 	if (err) {
2872 		verbose(env, "R%d max value is outside of the allowed memory range\n",
2873 			regno);
2874 		return err;
2875 	}
2876 
2877 	return 0;
2878 }
2879 
2880 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)2881 static int check_map_access(struct bpf_verifier_env *env, u32 regno,
2882 			    int off, int size, bool zero_size_allowed)
2883 {
2884 	struct bpf_verifier_state *vstate = env->cur_state;
2885 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
2886 	struct bpf_reg_state *reg = &state->regs[regno];
2887 	struct bpf_map *map = reg->map_ptr;
2888 	int err;
2889 
2890 	err = check_mem_region_access(env, regno, off, size, map->value_size,
2891 				      zero_size_allowed);
2892 	if (err)
2893 		return err;
2894 
2895 	if (map_value_has_spin_lock(map)) {
2896 		u32 lock = map->spin_lock_off;
2897 
2898 		/* if any part of struct bpf_spin_lock can be touched by
2899 		 * load/store reject this program.
2900 		 * To check that [x1, x2) overlaps with [y1, y2)
2901 		 * it is sufficient to check x1 < y2 && y1 < x2.
2902 		 */
2903 		if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) &&
2904 		     lock < reg->umax_value + off + size) {
2905 			verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2906 			return -EACCES;
2907 		}
2908 	}
2909 	return err;
2910 }
2911 
2912 #define MAX_PACKET_OFF 0xffff
2913 
resolve_prog_type(struct bpf_prog * prog)2914 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2915 {
2916 	return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2917 }
2918 
may_access_direct_pkt_data(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_access_type t)2919 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
2920 				       const struct bpf_call_arg_meta *meta,
2921 				       enum bpf_access_type t)
2922 {
2923 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2924 
2925 	switch (prog_type) {
2926 	/* Program types only with direct read access go here! */
2927 	case BPF_PROG_TYPE_LWT_IN:
2928 	case BPF_PROG_TYPE_LWT_OUT:
2929 	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2930 	case BPF_PROG_TYPE_SK_REUSEPORT:
2931 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
2932 	case BPF_PROG_TYPE_CGROUP_SKB:
2933 		if (t == BPF_WRITE)
2934 			return false;
2935 		fallthrough;
2936 
2937 	/* Program types with direct read + write access go here! */
2938 	case BPF_PROG_TYPE_SCHED_CLS:
2939 	case BPF_PROG_TYPE_SCHED_ACT:
2940 	case BPF_PROG_TYPE_XDP:
2941 	case BPF_PROG_TYPE_LWT_XMIT:
2942 	case BPF_PROG_TYPE_SK_SKB:
2943 	case BPF_PROG_TYPE_SK_MSG:
2944 		if (meta)
2945 			return meta->pkt_access;
2946 
2947 		env->seen_direct_write = true;
2948 		return true;
2949 
2950 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2951 		if (t == BPF_WRITE)
2952 			env->seen_direct_write = true;
2953 
2954 		return true;
2955 
2956 	default:
2957 		return false;
2958 	}
2959 }
2960 
check_packet_access(struct bpf_verifier_env * env,u32 regno,int off,int size,bool zero_size_allowed)2961 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
2962 			       int size, bool zero_size_allowed)
2963 {
2964 	struct bpf_reg_state *regs = cur_regs(env);
2965 	struct bpf_reg_state *reg = &regs[regno];
2966 	int err;
2967 
2968 	/* We may have added a variable offset to the packet pointer; but any
2969 	 * reg->range we have comes after that.  We are only checking the fixed
2970 	 * offset.
2971 	 */
2972 
2973 	/* We don't allow negative numbers, because we aren't tracking enough
2974 	 * detail to prove they're safe.
2975 	 */
2976 	if (reg->smin_value < 0) {
2977 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
2978 			regno);
2979 		return -EACCES;
2980 	}
2981 
2982 	err = reg->range < 0 ? -EINVAL :
2983 	      __check_mem_access(env, regno, off, size, reg->range,
2984 				 zero_size_allowed);
2985 	if (err) {
2986 		verbose(env, "R%d offset is outside of the packet\n", regno);
2987 		return err;
2988 	}
2989 
2990 	/* __check_mem_access has made sure "off + size - 1" is within u16.
2991 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2992 	 * otherwise find_good_pkt_pointers would have refused to set range info
2993 	 * that __check_mem_access would have rejected this pkt access.
2994 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2995 	 */
2996 	env->prog->aux->max_pkt_offset =
2997 		max_t(u32, env->prog->aux->max_pkt_offset,
2998 		      off + reg->umax_value + size - 1);
2999 
3000 	return err;
3001 }
3002 
3003 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env * env,int insn_idx,int off,int size,enum bpf_access_type t,enum bpf_reg_type * reg_type,u32 * btf_id)3004 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
3005 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
3006 			    u32 *btf_id)
3007 {
3008 	struct bpf_insn_access_aux info = {
3009 		.reg_type = *reg_type,
3010 		.log = &env->log,
3011 	};
3012 
3013 	if (env->ops->is_valid_access &&
3014 	    env->ops->is_valid_access(off, size, t, env->prog, &info)) {
3015 		/* A non zero info.ctx_field_size indicates that this field is a
3016 		 * candidate for later verifier transformation to load the whole
3017 		 * field and then apply a mask when accessed with a narrower
3018 		 * access than actual ctx access size. A zero info.ctx_field_size
3019 		 * will only allow for whole field access and rejects any other
3020 		 * type of narrower access.
3021 		 */
3022 		*reg_type = info.reg_type;
3023 
3024 		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
3025 			*btf_id = info.btf_id;
3026 		else
3027 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3028 		/* remember the offset of last byte accessed in ctx */
3029 		if (env->prog->aux->max_ctx_offset < off + size)
3030 			env->prog->aux->max_ctx_offset = off + size;
3031 		return 0;
3032 	}
3033 
3034 	verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3035 	return -EACCES;
3036 }
3037 
check_flow_keys_access(struct bpf_verifier_env * env,int off,int size)3038 static int check_flow_keys_access(struct bpf_verifier_env *env, int off,
3039 				  int size)
3040 {
3041 	if (size < 0 || off < 0 ||
3042 	    (u64)off + size > sizeof(struct bpf_flow_keys)) {
3043 		verbose(env, "invalid access to flow keys off=%d size=%d\n",
3044 			off, size);
3045 		return -EACCES;
3046 	}
3047 	return 0;
3048 }
3049 
check_sock_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int size,enum bpf_access_type t)3050 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx,
3051 			     u32 regno, int off, int size,
3052 			     enum bpf_access_type t)
3053 {
3054 	struct bpf_reg_state *regs = cur_regs(env);
3055 	struct bpf_reg_state *reg = &regs[regno];
3056 	struct bpf_insn_access_aux info = {};
3057 	bool valid;
3058 
3059 	if (reg->smin_value < 0) {
3060 		verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
3061 			regno);
3062 		return -EACCES;
3063 	}
3064 
3065 	switch (reg->type) {
3066 	case PTR_TO_SOCK_COMMON:
3067 		valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3068 		break;
3069 	case PTR_TO_SOCKET:
3070 		valid = bpf_sock_is_valid_access(off, size, t, &info);
3071 		break;
3072 	case PTR_TO_TCP_SOCK:
3073 		valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3074 		break;
3075 	case PTR_TO_XDP_SOCK:
3076 		valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3077 		break;
3078 	default:
3079 		valid = false;
3080 	}
3081 
3082 
3083 	if (valid) {
3084 		env->insn_aux_data[insn_idx].ctx_field_size =
3085 			info.ctx_field_size;
3086 		return 0;
3087 	}
3088 
3089 	verbose(env, "R%d invalid %s access off=%d size=%d\n",
3090 		regno, reg_type_str[reg->type], off, size);
3091 
3092 	return -EACCES;
3093 }
3094 
is_pointer_value(struct bpf_verifier_env * env,int regno)3095 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3096 {
3097 	return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3098 }
3099 
is_ctx_reg(struct bpf_verifier_env * env,int regno)3100 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3101 {
3102 	const struct bpf_reg_state *reg = reg_state(env, regno);
3103 
3104 	return reg->type == PTR_TO_CTX;
3105 }
3106 
is_sk_reg(struct bpf_verifier_env * env,int regno)3107 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3108 {
3109 	const struct bpf_reg_state *reg = reg_state(env, regno);
3110 
3111 	return type_is_sk_pointer(reg->type);
3112 }
3113 
is_pkt_reg(struct bpf_verifier_env * env,int regno)3114 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3115 {
3116 	const struct bpf_reg_state *reg = reg_state(env, regno);
3117 
3118 	return type_is_pkt_pointer(reg->type);
3119 }
3120 
is_flow_key_reg(struct bpf_verifier_env * env,int regno)3121 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3122 {
3123 	const struct bpf_reg_state *reg = reg_state(env, regno);
3124 
3125 	/* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3126 	return reg->type == PTR_TO_FLOW_KEYS;
3127 }
3128 
check_pkt_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict)3129 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
3130 				   const struct bpf_reg_state *reg,
3131 				   int off, int size, bool strict)
3132 {
3133 	struct tnum reg_off;
3134 	int ip_align;
3135 
3136 	/* Byte size accesses are always allowed. */
3137 	if (!strict || size == 1)
3138 		return 0;
3139 
3140 	/* For platforms that do not have a Kconfig enabling
3141 	 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3142 	 * NET_IP_ALIGN is universally set to '2'.  And on platforms
3143 	 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3144 	 * to this code only in strict mode where we want to emulate
3145 	 * the NET_IP_ALIGN==2 checking.  Therefore use an
3146 	 * unconditional IP align value of '2'.
3147 	 */
3148 	ip_align = 2;
3149 
3150 	reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3151 	if (!tnum_is_aligned(reg_off, size)) {
3152 		char tn_buf[48];
3153 
3154 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3155 		verbose(env,
3156 			"misaligned packet access off %d+%s+%d+%d size %d\n",
3157 			ip_align, tn_buf, reg->off, off, size);
3158 		return -EACCES;
3159 	}
3160 
3161 	return 0;
3162 }
3163 
check_generic_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,const char * pointer_desc,int off,int size,bool strict)3164 static int check_generic_ptr_alignment(struct bpf_verifier_env *env,
3165 				       const struct bpf_reg_state *reg,
3166 				       const char *pointer_desc,
3167 				       int off, int size, bool strict)
3168 {
3169 	struct tnum reg_off;
3170 
3171 	/* Byte size accesses are always allowed. */
3172 	if (!strict || size == 1)
3173 		return 0;
3174 
3175 	reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3176 	if (!tnum_is_aligned(reg_off, size)) {
3177 		char tn_buf[48];
3178 
3179 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3180 		verbose(env, "misaligned %saccess off %s+%d+%d size %d\n",
3181 			pointer_desc, tn_buf, reg->off, off, size);
3182 		return -EACCES;
3183 	}
3184 
3185 	return 0;
3186 }
3187 
check_ptr_alignment(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int off,int size,bool strict_alignment_once)3188 static int check_ptr_alignment(struct bpf_verifier_env *env,
3189 			       const struct bpf_reg_state *reg, int off,
3190 			       int size, bool strict_alignment_once)
3191 {
3192 	bool strict = env->strict_alignment || strict_alignment_once;
3193 	const char *pointer_desc = "";
3194 
3195 	switch (reg->type) {
3196 	case PTR_TO_PACKET:
3197 	case PTR_TO_PACKET_META:
3198 		/* Special case, because of NET_IP_ALIGN. Given metadata sits
3199 		 * right in front, treat it the very same way.
3200 		 */
3201 		return check_pkt_ptr_alignment(env, reg, off, size, strict);
3202 	case PTR_TO_FLOW_KEYS:
3203 		pointer_desc = "flow keys ";
3204 		break;
3205 	case PTR_TO_MAP_VALUE:
3206 		pointer_desc = "value ";
3207 		break;
3208 	case PTR_TO_CTX:
3209 		pointer_desc = "context ";
3210 		break;
3211 	case PTR_TO_STACK:
3212 		pointer_desc = "stack ";
3213 		/* The stack spill tracking logic in check_stack_write_fixed_off()
3214 		 * and check_stack_read_fixed_off() relies on stack accesses being
3215 		 * aligned.
3216 		 */
3217 		strict = true;
3218 		break;
3219 	case PTR_TO_SOCKET:
3220 		pointer_desc = "sock ";
3221 		break;
3222 	case PTR_TO_SOCK_COMMON:
3223 		pointer_desc = "sock_common ";
3224 		break;
3225 	case PTR_TO_TCP_SOCK:
3226 		pointer_desc = "tcp_sock ";
3227 		break;
3228 	case PTR_TO_XDP_SOCK:
3229 		pointer_desc = "xdp_sock ";
3230 		break;
3231 	default:
3232 		break;
3233 	}
3234 	return check_generic_ptr_alignment(env, reg, pointer_desc, off, size,
3235 					   strict);
3236 }
3237 
update_stack_depth(struct bpf_verifier_env * env,const struct bpf_func_state * func,int off)3238 static int update_stack_depth(struct bpf_verifier_env *env,
3239 			      const struct bpf_func_state *func,
3240 			      int off)
3241 {
3242 	u16 stack = env->subprog_info[func->subprogno].stack_depth;
3243 
3244 	if (stack >= -off)
3245 		return 0;
3246 
3247 	/* update known max for given subprogram */
3248 	env->subprog_info[func->subprogno].stack_depth = -off;
3249 	return 0;
3250 }
3251 
3252 /* starting from main bpf function walk all instructions of the function
3253  * and recursively walk all callees that given function can call.
3254  * Ignore jump and exit insns.
3255  * Since recursion is prevented by check_cfg() this algorithm
3256  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3257  */
check_max_stack_depth(struct bpf_verifier_env * env)3258 static int check_max_stack_depth(struct bpf_verifier_env *env)
3259 {
3260 	int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3261 	struct bpf_subprog_info *subprog = env->subprog_info;
3262 	struct bpf_insn *insn = env->prog->insnsi;
3263 	bool tail_call_reachable = false;
3264 	int ret_insn[MAX_CALL_FRAMES];
3265 	int ret_prog[MAX_CALL_FRAMES];
3266 	int j;
3267 
3268 process_func:
3269 	/* protect against potential stack overflow that might happen when
3270 	 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3271 	 * depth for such case down to 256 so that the worst case scenario
3272 	 * would result in 8k stack size (32 which is tailcall limit * 256 =
3273 	 * 8k).
3274 	 *
3275 	 * To get the idea what might happen, see an example:
3276 	 * func1 -> sub rsp, 128
3277 	 *  subfunc1 -> sub rsp, 256
3278 	 *  tailcall1 -> add rsp, 256
3279 	 *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3280 	 *   subfunc2 -> sub rsp, 64
3281 	 *   subfunc22 -> sub rsp, 128
3282 	 *   tailcall2 -> add rsp, 128
3283 	 *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3284 	 *
3285 	 * tailcall will unwind the current stack frame but it will not get rid
3286 	 * of caller's stack as shown on the example above.
3287 	 */
3288 	if (idx && subprog[idx].has_tail_call && depth >= 256) {
3289 		verbose(env,
3290 			"tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3291 			depth);
3292 		return -EACCES;
3293 	}
3294 	/* round up to 32-bytes, since this is granularity
3295 	 * of interpreter stack size
3296 	 */
3297 	depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3298 	if (depth > MAX_BPF_STACK) {
3299 		verbose(env, "combined stack size of %d calls is %d. Too large\n",
3300 			frame + 1, depth);
3301 		return -EACCES;
3302 	}
3303 continue_func:
3304 	subprog_end = subprog[idx + 1].start;
3305 	for (; i < subprog_end; i++) {
3306 		if (insn[i].code != (BPF_JMP | BPF_CALL))
3307 			continue;
3308 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
3309 			continue;
3310 		/* remember insn and function to return to */
3311 		ret_insn[frame] = i + 1;
3312 		ret_prog[frame] = idx;
3313 
3314 		/* find the callee */
3315 		i = i + insn[i].imm + 1;
3316 		idx = find_subprog(env, i);
3317 		if (idx < 0) {
3318 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3319 				  i);
3320 			return -EFAULT;
3321 		}
3322 
3323 		if (subprog[idx].has_tail_call)
3324 			tail_call_reachable = true;
3325 
3326 		frame++;
3327 		if (frame >= MAX_CALL_FRAMES) {
3328 			verbose(env, "the call stack of %d frames is too deep !\n",
3329 				frame);
3330 			return -E2BIG;
3331 		}
3332 		goto process_func;
3333 	}
3334 	/* if tail call got detected across bpf2bpf calls then mark each of the
3335 	 * currently present subprog frames as tail call reachable subprogs;
3336 	 * this info will be utilized by JIT so that we will be preserving the
3337 	 * tail call counter throughout bpf2bpf calls combined with tailcalls
3338 	 */
3339 	if (tail_call_reachable)
3340 		for (j = 0; j < frame; j++)
3341 			subprog[ret_prog[j]].tail_call_reachable = true;
3342 	if (subprog[0].tail_call_reachable)
3343 		env->prog->aux->tail_call_reachable = true;
3344 
3345 	/* end of for() loop means the last insn of the 'subprog'
3346 	 * was reached. Doesn't matter whether it was JA or EXIT
3347 	 */
3348 	if (frame == 0)
3349 		return 0;
3350 	depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), 32);
3351 	frame--;
3352 	i = ret_insn[frame];
3353 	idx = ret_prog[frame];
3354 	goto continue_func;
3355 }
3356 
3357 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env * env,const struct bpf_insn * insn,int idx)3358 static int get_callee_stack_depth(struct bpf_verifier_env *env,
3359 				  const struct bpf_insn *insn, int idx)
3360 {
3361 	int start = idx + insn->imm + 1, subprog;
3362 
3363 	subprog = find_subprog(env, start);
3364 	if (subprog < 0) {
3365 		WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
3366 			  start);
3367 		return -EFAULT;
3368 	}
3369 	return env->subprog_info[subprog].stack_depth;
3370 }
3371 #endif
3372 
check_ctx_reg(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno)3373 int check_ctx_reg(struct bpf_verifier_env *env,
3374 		  const struct bpf_reg_state *reg, int regno)
3375 {
3376 	/* Access to ctx or passing it to a helper is only allowed in
3377 	 * its original, unmodified form.
3378 	 */
3379 
3380 	if (reg->off) {
3381 		verbose(env, "dereference of modified ctx ptr R%d off=%d disallowed\n",
3382 			regno, reg->off);
3383 		return -EACCES;
3384 	}
3385 
3386 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3387 		char tn_buf[48];
3388 
3389 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3390 		verbose(env, "variable ctx access var_off=%s disallowed\n", tn_buf);
3391 		return -EACCES;
3392 	}
3393 
3394 	return 0;
3395 }
3396 
__check_buffer_access(struct bpf_verifier_env * env,const char * buf_info,const struct bpf_reg_state * reg,int regno,int off,int size)3397 static int __check_buffer_access(struct bpf_verifier_env *env,
3398 				 const char *buf_info,
3399 				 const struct bpf_reg_state *reg,
3400 				 int regno, int off, int size)
3401 {
3402 	if (off < 0) {
3403 		verbose(env,
3404 			"R%d invalid %s buffer access: off=%d, size=%d\n",
3405 			regno, buf_info, off, size);
3406 		return -EACCES;
3407 	}
3408 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3409 		char tn_buf[48];
3410 
3411 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3412 		verbose(env,
3413 			"R%d invalid variable buffer offset: off=%d, var_off=%s\n",
3414 			regno, off, tn_buf);
3415 		return -EACCES;
3416 	}
3417 
3418 	return 0;
3419 }
3420 
check_tp_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size)3421 static int check_tp_buffer_access(struct bpf_verifier_env *env,
3422 				  const struct bpf_reg_state *reg,
3423 				  int regno, int off, int size)
3424 {
3425 	int err;
3426 
3427 	err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3428 	if (err)
3429 		return err;
3430 
3431 	if (off + size > env->prog->aux->max_tp_access)
3432 		env->prog->aux->max_tp_access = off + size;
3433 
3434 	return 0;
3435 }
3436 
check_buffer_access(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,int regno,int off,int size,bool zero_size_allowed,const char * buf_info,u32 * max_access)3437 static int check_buffer_access(struct bpf_verifier_env *env,
3438 			       const struct bpf_reg_state *reg,
3439 			       int regno, int off, int size,
3440 			       bool zero_size_allowed,
3441 			       const char *buf_info,
3442 			       u32 *max_access)
3443 {
3444 	int err;
3445 
3446 	err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3447 	if (err)
3448 		return err;
3449 
3450 	if (off + size > *max_access)
3451 		*max_access = off + size;
3452 
3453 	return 0;
3454 }
3455 
3456 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state * reg)3457 static void zext_32_to_64(struct bpf_reg_state *reg)
3458 {
3459 	reg->var_off = tnum_subreg(reg->var_off);
3460 	__reg_assign_32_into_64(reg);
3461 }
3462 
3463 /* truncate register to smaller size (in bytes)
3464  * must be called with size < BPF_REG_SIZE
3465  */
coerce_reg_to_size(struct bpf_reg_state * reg,int size)3466 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3467 {
3468 	u64 mask;
3469 
3470 	/* clear high bits in bit representation */
3471 	reg->var_off = tnum_cast(reg->var_off, size);
3472 
3473 	/* fix arithmetic bounds */
3474 	mask = ((u64)1 << (size * 8)) - 1;
3475 	if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3476 		reg->umin_value &= mask;
3477 		reg->umax_value &= mask;
3478 	} else {
3479 		reg->umin_value = 0;
3480 		reg->umax_value = mask;
3481 	}
3482 	reg->smin_value = reg->umin_value;
3483 	reg->smax_value = reg->umax_value;
3484 
3485 	/* If size is smaller than 32bit register the 32bit register
3486 	 * values are also truncated so we push 64-bit bounds into
3487 	 * 32-bit bounds. Above were truncated < 32-bits already.
3488 	 */
3489 	if (size >= 4)
3490 		return;
3491 	__reg_combine_64_into_32(reg);
3492 }
3493 
bpf_map_is_rdonly(const struct bpf_map * map)3494 static bool bpf_map_is_rdonly(const struct bpf_map *map)
3495 {
3496 	/* A map is considered read-only if the following condition are true:
3497 	 *
3498 	 * 1) BPF program side cannot change any of the map content. The
3499 	 *    BPF_F_RDONLY_PROG flag is throughout the lifetime of a map
3500 	 *    and was set at map creation time.
3501 	 * 2) The map value(s) have been initialized from user space by a
3502 	 *    loader and then "frozen", such that no new map update/delete
3503 	 *    operations from syscall side are possible for the rest of
3504 	 *    the map's lifetime from that point onwards.
3505 	 * 3) Any parallel/pending map update/delete operations from syscall
3506 	 *    side have been completed. Only after that point, it's safe to
3507 	 *    assume that map value(s) are immutable.
3508 	 */
3509 	return (map->map_flags & BPF_F_RDONLY_PROG) &&
3510 	       READ_ONCE(map->frozen) &&
3511 	       !bpf_map_write_active(map);
3512 }
3513 
bpf_map_direct_read(struct bpf_map * map,int off,int size,u64 * val)3514 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3515 {
3516 	void *ptr;
3517 	u64 addr;
3518 	int err;
3519 
3520 	err = map->ops->map_direct_value_addr(map, &addr, off);
3521 	if (err)
3522 		return err;
3523 	ptr = (void *)(long)addr + off;
3524 
3525 	switch (size) {
3526 	case sizeof(u8):
3527 		*val = (u64)*(u8 *)ptr;
3528 		break;
3529 	case sizeof(u16):
3530 		*val = (u64)*(u16 *)ptr;
3531 		break;
3532 	case sizeof(u32):
3533 		*val = (u64)*(u32 *)ptr;
3534 		break;
3535 	case sizeof(u64):
3536 		*val = *(u64 *)ptr;
3537 		break;
3538 	default:
3539 		return -EINVAL;
3540 	}
3541 	return 0;
3542 }
3543 
check_ptr_to_btf_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)3544 static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
3545 				   struct bpf_reg_state *regs,
3546 				   int regno, int off, int size,
3547 				   enum bpf_access_type atype,
3548 				   int value_regno)
3549 {
3550 	struct bpf_reg_state *reg = regs + regno;
3551 	const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3552 	const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3553 	u32 btf_id;
3554 	int ret;
3555 
3556 	if (off < 0) {
3557 		verbose(env,
3558 			"R%d is ptr_%s invalid negative access: off=%d\n",
3559 			regno, tname, off);
3560 		return -EACCES;
3561 	}
3562 	if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3563 		char tn_buf[48];
3564 
3565 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3566 		verbose(env,
3567 			"R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n",
3568 			regno, tname, off, tn_buf);
3569 		return -EACCES;
3570 	}
3571 
3572 	if (env->ops->btf_struct_access) {
3573 		ret = env->ops->btf_struct_access(&env->log, t, off, size,
3574 						  atype, &btf_id);
3575 	} else {
3576 		if (atype != BPF_READ) {
3577 			verbose(env, "only read is supported\n");
3578 			return -EACCES;
3579 		}
3580 
3581 		ret = btf_struct_access(&env->log, t, off, size, atype,
3582 					&btf_id);
3583 	}
3584 
3585 	if (ret < 0)
3586 		return ret;
3587 
3588 	if (atype == BPF_READ && value_regno >= 0)
3589 		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3590 
3591 	return 0;
3592 }
3593 
check_ptr_to_map_access(struct bpf_verifier_env * env,struct bpf_reg_state * regs,int regno,int off,int size,enum bpf_access_type atype,int value_regno)3594 static int check_ptr_to_map_access(struct bpf_verifier_env *env,
3595 				   struct bpf_reg_state *regs,
3596 				   int regno, int off, int size,
3597 				   enum bpf_access_type atype,
3598 				   int value_regno)
3599 {
3600 	struct bpf_reg_state *reg = regs + regno;
3601 	struct bpf_map *map = reg->map_ptr;
3602 	const struct btf_type *t;
3603 	const char *tname;
3604 	u32 btf_id;
3605 	int ret;
3606 
3607 	if (!btf_vmlinux) {
3608 		verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3609 		return -ENOTSUPP;
3610 	}
3611 
3612 	if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3613 		verbose(env, "map_ptr access not supported for map type %d\n",
3614 			map->map_type);
3615 		return -ENOTSUPP;
3616 	}
3617 
3618 	t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3619 	tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3620 
3621 	if (!env->allow_ptr_to_map_access) {
3622 		verbose(env,
3623 			"%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n",
3624 			tname);
3625 		return -EPERM;
3626 	}
3627 
3628 	if (off < 0) {
3629 		verbose(env, "R%d is %s invalid negative access: off=%d\n",
3630 			regno, tname, off);
3631 		return -EACCES;
3632 	}
3633 
3634 	if (atype != BPF_READ) {
3635 		verbose(env, "only read from %s is supported\n", tname);
3636 		return -EACCES;
3637 	}
3638 
3639 	ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3640 	if (ret < 0)
3641 		return ret;
3642 
3643 	if (value_regno >= 0)
3644 		mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3645 
3646 	return 0;
3647 }
3648 
3649 /* Check that the stack access at the given offset is within bounds. The
3650  * maximum valid offset is -1.
3651  *
3652  * The minimum valid offset is -MAX_BPF_STACK for writes, and
3653  * -state->allocated_stack for reads.
3654  */
check_stack_slot_within_bounds(int off,struct bpf_func_state * state,enum bpf_access_type t)3655 static int check_stack_slot_within_bounds(int off,
3656 					  struct bpf_func_state *state,
3657 					  enum bpf_access_type t)
3658 {
3659 	int min_valid_off;
3660 
3661 	if (t == BPF_WRITE)
3662 		min_valid_off = -MAX_BPF_STACK;
3663 	else
3664 		min_valid_off = -state->allocated_stack;
3665 
3666 	if (off < min_valid_off || off > -1)
3667 		return -EACCES;
3668 	return 0;
3669 }
3670 
3671 /* Check that the stack access at 'regno + off' falls within the maximum stack
3672  * bounds.
3673  *
3674  * 'off' includes `regno->offset`, but not its dynamic part (if any).
3675  */
check_stack_access_within_bounds(struct bpf_verifier_env * env,int regno,int off,int access_size,enum stack_access_src src,enum bpf_access_type type)3676 static int check_stack_access_within_bounds(
3677 		struct bpf_verifier_env *env,
3678 		int regno, int off, int access_size,
3679 		enum stack_access_src src, enum bpf_access_type type)
3680 {
3681 	struct bpf_reg_state *regs = cur_regs(env);
3682 	struct bpf_reg_state *reg = regs + regno;
3683 	struct bpf_func_state *state = func(env, reg);
3684 	int min_off, max_off;
3685 	int err;
3686 	char *err_extra;
3687 
3688 	if (src == ACCESS_HELPER)
3689 		/* We don't know if helpers are reading or writing (or both). */
3690 		err_extra = " indirect access to";
3691 	else if (type == BPF_READ)
3692 		err_extra = " read from";
3693 	else
3694 		err_extra = " write to";
3695 
3696 	if (tnum_is_const(reg->var_off)) {
3697 		min_off = reg->var_off.value + off;
3698 		if (access_size > 0)
3699 			max_off = min_off + access_size - 1;
3700 		else
3701 			max_off = min_off;
3702 	} else {
3703 		if (reg->smax_value >= BPF_MAX_VAR_OFF ||
3704 		    reg->smin_value <= -BPF_MAX_VAR_OFF) {
3705 			verbose(env, "invalid unbounded variable-offset%s stack R%d\n",
3706 				err_extra, regno);
3707 			return -EACCES;
3708 		}
3709 		min_off = reg->smin_value + off;
3710 		if (access_size > 0)
3711 			max_off = reg->smax_value + off + access_size - 1;
3712 		else
3713 			max_off = min_off;
3714 	}
3715 
3716 	err = check_stack_slot_within_bounds(min_off, state, type);
3717 	if (!err)
3718 		err = check_stack_slot_within_bounds(max_off, state, type);
3719 
3720 	if (err) {
3721 		if (tnum_is_const(reg->var_off)) {
3722 			verbose(env, "invalid%s stack R%d off=%d size=%d\n",
3723 				err_extra, regno, off, access_size);
3724 		} else {
3725 			char tn_buf[48];
3726 
3727 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3728 			verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n",
3729 				err_extra, regno, tn_buf, access_size);
3730 		}
3731 	}
3732 	return err;
3733 }
3734 
3735 /* check whether memory at (regno + off) is accessible for t = (read | write)
3736  * if t==write, value_regno is a register which value is stored into memory
3737  * if t==read, value_regno is a register which will receive the value from memory
3738  * if t==write && value_regno==-1, some unknown value is stored into memory
3739  * if t==read && value_regno==-1, don't care what we read from memory
3740  */
check_mem_access(struct bpf_verifier_env * env,int insn_idx,u32 regno,int off,int bpf_size,enum bpf_access_type t,int value_regno,bool strict_alignment_once)3741 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno,
3742 			    int off, int bpf_size, enum bpf_access_type t,
3743 			    int value_regno, bool strict_alignment_once)
3744 {
3745 	struct bpf_reg_state *regs = cur_regs(env);
3746 	struct bpf_reg_state *reg = regs + regno;
3747 	struct bpf_func_state *state;
3748 	int size, err = 0;
3749 
3750 	size = bpf_size_to_bytes(bpf_size);
3751 	if (size < 0)
3752 		return size;
3753 
3754 	/* alignment checks will add in reg->off themselves */
3755 	err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
3756 	if (err)
3757 		return err;
3758 
3759 	/* for access checks, reg->off is just part of off */
3760 	off += reg->off;
3761 
3762 	if (reg->type == PTR_TO_MAP_VALUE) {
3763 		if (t == BPF_WRITE && value_regno >= 0 &&
3764 		    is_pointer_value(env, value_regno)) {
3765 			verbose(env, "R%d leaks addr into map\n", value_regno);
3766 			return -EACCES;
3767 		}
3768 		err = check_map_access_type(env, regno, off, size, t);
3769 		if (err)
3770 			return err;
3771 		err = check_map_access(env, regno, off, size, false);
3772 		if (!err && t == BPF_READ && value_regno >= 0) {
3773 			struct bpf_map *map = reg->map_ptr;
3774 
3775 			/* if map is read-only, track its contents as scalars */
3776 			if (tnum_is_const(reg->var_off) &&
3777 			    bpf_map_is_rdonly(map) &&
3778 			    map->ops->map_direct_value_addr) {
3779 				int map_off = off + reg->var_off.value;
3780 				u64 val = 0;
3781 
3782 				err = bpf_map_direct_read(map, map_off, size,
3783 							  &val);
3784 				if (err)
3785 					return err;
3786 
3787 				regs[value_regno].type = SCALAR_VALUE;
3788 				__mark_reg_known(&regs[value_regno], val);
3789 			} else {
3790 				mark_reg_unknown(env, regs, value_regno);
3791 			}
3792 		}
3793 	} else if (reg->type == PTR_TO_MEM) {
3794 		if (t == BPF_WRITE && value_regno >= 0 &&
3795 		    is_pointer_value(env, value_regno)) {
3796 			verbose(env, "R%d leaks addr into mem\n", value_regno);
3797 			return -EACCES;
3798 		}
3799 		err = check_mem_region_access(env, regno, off, size,
3800 					      reg->mem_size, false);
3801 		if (!err && t == BPF_READ && value_regno >= 0)
3802 			mark_reg_unknown(env, regs, value_regno);
3803 	} else if (reg->type == PTR_TO_CTX) {
3804 		enum bpf_reg_type reg_type = SCALAR_VALUE;
3805 		u32 btf_id = 0;
3806 
3807 		if (t == BPF_WRITE && value_regno >= 0 &&
3808 		    is_pointer_value(env, value_regno)) {
3809 			verbose(env, "R%d leaks addr into ctx\n", value_regno);
3810 			return -EACCES;
3811 		}
3812 
3813 		err = check_ctx_reg(env, reg, regno);
3814 		if (err < 0)
3815 			return err;
3816 
3817 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3818 		if (err)
3819 			verbose_linfo(env, insn_idx, "; ");
3820 		if (!err && t == BPF_READ && value_regno >= 0) {
3821 			/* ctx access returns either a scalar, or a
3822 			 * PTR_TO_PACKET[_META,_END]. In the latter
3823 			 * case, we know the offset is zero.
3824 			 */
3825 			if (reg_type == SCALAR_VALUE) {
3826 				mark_reg_unknown(env, regs, value_regno);
3827 			} else {
3828 				mark_reg_known_zero(env, regs,
3829 						    value_regno);
3830 				if (reg_type_may_be_null(reg_type))
3831 					regs[value_regno].id = ++env->id_gen;
3832 				/* A load of ctx field could have different
3833 				 * actual load size with the one encoded in the
3834 				 * insn. When the dst is PTR, it is for sure not
3835 				 * a sub-register.
3836 				 */
3837 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3838 				if (reg_type == PTR_TO_BTF_ID ||
3839 				    reg_type == PTR_TO_BTF_ID_OR_NULL)
3840 					regs[value_regno].btf_id = btf_id;
3841 			}
3842 			regs[value_regno].type = reg_type;
3843 		}
3844 
3845 	} else if (reg->type == PTR_TO_STACK) {
3846 		/* Basic bounds checks. */
3847 		err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
3848 		if (err)
3849 			return err;
3850 
3851 		state = func(env, reg);
3852 		err = update_stack_depth(env, state, off);
3853 		if (err)
3854 			return err;
3855 
3856 		if (t == BPF_READ)
3857 			err = check_stack_read(env, regno, off, size,
3858 					       value_regno);
3859 		else
3860 			err = check_stack_write(env, regno, off, size,
3861 						value_regno, insn_idx);
3862 	} else if (reg_is_pkt_pointer(reg)) {
3863 		if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3864 			verbose(env, "cannot write into packet\n");
3865 			return -EACCES;
3866 		}
3867 		if (t == BPF_WRITE && value_regno >= 0 &&
3868 		    is_pointer_value(env, value_regno)) {
3869 			verbose(env, "R%d leaks addr into packet\n",
3870 				value_regno);
3871 			return -EACCES;
3872 		}
3873 		err = check_packet_access(env, regno, off, size, false);
3874 		if (!err && t == BPF_READ && value_regno >= 0)
3875 			mark_reg_unknown(env, regs, value_regno);
3876 	} else if (reg->type == PTR_TO_FLOW_KEYS) {
3877 		if (t == BPF_WRITE && value_regno >= 0 &&
3878 		    is_pointer_value(env, value_regno)) {
3879 			verbose(env, "R%d leaks addr into flow keys\n",
3880 				value_regno);
3881 			return -EACCES;
3882 		}
3883 
3884 		err = check_flow_keys_access(env, off, size);
3885 		if (!err && t == BPF_READ && value_regno >= 0)
3886 			mark_reg_unknown(env, regs, value_regno);
3887 	} else if (type_is_sk_pointer(reg->type)) {
3888 		if (t == BPF_WRITE) {
3889 			verbose(env, "R%d cannot write into %s\n",
3890 				regno, reg_type_str[reg->type]);
3891 			return -EACCES;
3892 		}
3893 		err = check_sock_access(env, insn_idx, regno, off, size, t);
3894 		if (!err && value_regno >= 0)
3895 			mark_reg_unknown(env, regs, value_regno);
3896 	} else if (reg->type == PTR_TO_TP_BUFFER) {
3897 		err = check_tp_buffer_access(env, reg, regno, off, size);
3898 		if (!err && t == BPF_READ && value_regno >= 0)
3899 			mark_reg_unknown(env, regs, value_regno);
3900 	} else if (reg->type == PTR_TO_BTF_ID) {
3901 		err = check_ptr_to_btf_access(env, regs, regno, off, size, t,
3902 					      value_regno);
3903 	} else if (reg->type == CONST_PTR_TO_MAP) {
3904 		err = check_ptr_to_map_access(env, regs, regno, off, size, t,
3905 					      value_regno);
3906 	} else if (reg->type == PTR_TO_RDONLY_BUF) {
3907 		if (t == BPF_WRITE) {
3908 			verbose(env, "R%d cannot write into %s\n",
3909 				regno, reg_type_str[reg->type]);
3910 			return -EACCES;
3911 		}
3912 		err = check_buffer_access(env, reg, regno, off, size, false,
3913 					  "rdonly",
3914 					  &env->prog->aux->max_rdonly_access);
3915 		if (!err && value_regno >= 0)
3916 			mark_reg_unknown(env, regs, value_regno);
3917 	} else if (reg->type == PTR_TO_RDWR_BUF) {
3918 		err = check_buffer_access(env, reg, regno, off, size, false,
3919 					  "rdwr",
3920 					  &env->prog->aux->max_rdwr_access);
3921 		if (!err && t == BPF_READ && value_regno >= 0)
3922 			mark_reg_unknown(env, regs, value_regno);
3923 	} else {
3924 		verbose(env, "R%d invalid mem access '%s'\n", regno,
3925 			reg_type_str[reg->type]);
3926 		return -EACCES;
3927 	}
3928 
3929 	if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ &&
3930 	    regs[value_regno].type == SCALAR_VALUE) {
3931 		/* b/h/w load zero-extends, mark upper bits as known 0 */
3932 		coerce_reg_to_size(&regs[value_regno], size);
3933 	}
3934 	return err;
3935 }
3936 
check_xadd(struct bpf_verifier_env * env,int insn_idx,struct bpf_insn * insn)3937 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3938 {
3939 	int err;
3940 
3941 	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
3942 	    insn->imm != 0) {
3943 		verbose(env, "BPF_XADD uses reserved fields\n");
3944 		return -EINVAL;
3945 	}
3946 
3947 	/* check src1 operand */
3948 	err = check_reg_arg(env, insn->src_reg, SRC_OP);
3949 	if (err)
3950 		return err;
3951 
3952 	/* check src2 operand */
3953 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3954 	if (err)
3955 		return err;
3956 
3957 	if (is_pointer_value(env, insn->src_reg)) {
3958 		verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3959 		return -EACCES;
3960 	}
3961 
3962 	if (is_ctx_reg(env, insn->dst_reg) ||
3963 	    is_pkt_reg(env, insn->dst_reg) ||
3964 	    is_flow_key_reg(env, insn->dst_reg) ||
3965 	    is_sk_reg(env, insn->dst_reg)) {
3966 		verbose(env, "BPF_XADD stores into R%d %s is not allowed\n",
3967 			insn->dst_reg,
3968 			reg_type_str[reg_state(env, insn->dst_reg)->type]);
3969 		return -EACCES;
3970 	}
3971 
3972 	/* check whether atomic_add can read the memory */
3973 	err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3974 			       BPF_SIZE(insn->code), BPF_READ, -1, true);
3975 	if (err)
3976 		return err;
3977 
3978 	/* check whether atomic_add can write into the same memory */
3979 	return check_mem_access(env, insn_idx, insn->dst_reg, insn->off,
3980 				BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3981 }
3982 
3983 /* When register 'regno' is used to read the stack (either directly or through
3984  * a helper function) make sure that it's within stack boundary and, depending
3985  * on the access type, that all elements of the stack are initialized.
3986  *
3987  * 'off' includes 'regno->off', but not its dynamic part (if any).
3988  *
3989  * All registers that have been spilled on the stack in the slots within the
3990  * read offsets are marked as read.
3991  */
check_stack_range_initialized(struct bpf_verifier_env * env,int regno,int off,int access_size,bool zero_size_allowed,enum stack_access_src type,struct bpf_call_arg_meta * meta)3992 static int check_stack_range_initialized(
3993 		struct bpf_verifier_env *env, int regno, int off,
3994 		int access_size, bool zero_size_allowed,
3995 		enum stack_access_src type, struct bpf_call_arg_meta *meta)
3996 {
3997 	struct bpf_reg_state *reg = reg_state(env, regno);
3998 	struct bpf_func_state *state = func(env, reg);
3999 	int err, min_off, max_off, i, j, slot, spi;
4000 	char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
4001 	enum bpf_access_type bounds_check_type;
4002 	/* Some accesses can write anything into the stack, others are
4003 	 * read-only.
4004 	 */
4005 	bool clobber = false;
4006 
4007 	if (access_size == 0 && !zero_size_allowed) {
4008 		verbose(env, "invalid zero-sized read\n");
4009 		return -EACCES;
4010 	}
4011 
4012 	if (type == ACCESS_HELPER) {
4013 		/* The bounds checks for writes are more permissive than for
4014 		 * reads. However, if raw_mode is not set, we'll do extra
4015 		 * checks below.
4016 		 */
4017 		bounds_check_type = BPF_WRITE;
4018 		clobber = true;
4019 	} else {
4020 		bounds_check_type = BPF_READ;
4021 	}
4022 	err = check_stack_access_within_bounds(env, regno, off, access_size,
4023 					       type, bounds_check_type);
4024 	if (err)
4025 		return err;
4026 
4027 
4028 	if (tnum_is_const(reg->var_off)) {
4029 		min_off = max_off = reg->var_off.value + off;
4030 	} else {
4031 		/* Variable offset is prohibited for unprivileged mode for
4032 		 * simplicity since it requires corresponding support in
4033 		 * Spectre masking for stack ALU.
4034 		 * See also retrieve_ptr_limit().
4035 		 */
4036 		if (!env->bypass_spec_v1) {
4037 			char tn_buf[48];
4038 
4039 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4040 			verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n",
4041 				regno, err_extra, tn_buf);
4042 			return -EACCES;
4043 		}
4044 		/* Only initialized buffer on stack is allowed to be accessed
4045 		 * with variable offset. With uninitialized buffer it's hard to
4046 		 * guarantee that whole memory is marked as initialized on
4047 		 * helper return since specific bounds are unknown what may
4048 		 * cause uninitialized stack leaking.
4049 		 */
4050 		if (meta && meta->raw_mode)
4051 			meta = NULL;
4052 
4053 		min_off = reg->smin_value + off;
4054 		max_off = reg->smax_value + off;
4055 	}
4056 
4057 	if (meta && meta->raw_mode) {
4058 		meta->access_size = access_size;
4059 		meta->regno = regno;
4060 		return 0;
4061 	}
4062 
4063 	for (i = min_off; i < max_off + access_size; i++) {
4064 		u8 *stype;
4065 
4066 		slot = -i - 1;
4067 		spi = slot / BPF_REG_SIZE;
4068 		if (state->allocated_stack <= slot)
4069 			goto err;
4070 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4071 		if (*stype == STACK_MISC)
4072 			goto mark;
4073 		if (*stype == STACK_ZERO) {
4074 			if (clobber) {
4075 				/* helper can write anything into the stack */
4076 				*stype = STACK_MISC;
4077 			}
4078 			goto mark;
4079 		}
4080 
4081 		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4082 		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
4083 			goto mark;
4084 
4085 		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4086 		    (state->stack[spi].spilled_ptr.type == SCALAR_VALUE ||
4087 		     env->allow_ptr_leaks)) {
4088 			if (clobber) {
4089 				__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4090 				for (j = 0; j < BPF_REG_SIZE; j++)
4091 					state->stack[spi].slot_type[j] = STACK_MISC;
4092 			}
4093 			goto mark;
4094 		}
4095 
4096 err:
4097 		if (tnum_is_const(reg->var_off)) {
4098 			verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n",
4099 				err_extra, regno, min_off, i - min_off, access_size);
4100 		} else {
4101 			char tn_buf[48];
4102 
4103 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4104 			verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n",
4105 				err_extra, regno, tn_buf, i - min_off, access_size);
4106 		}
4107 		return -EACCES;
4108 mark:
4109 		/* reading any byte out of 8-byte 'spill_slot' will cause
4110 		 * the whole slot to be marked as 'read'
4111 		 */
4112 		mark_reg_read(env, &state->stack[spi].spilled_ptr,
4113 			      state->stack[spi].spilled_ptr.parent,
4114 			      REG_LIVE_READ64);
4115 	}
4116 	return update_stack_depth(env, state, min_off);
4117 }
4118 
check_helper_mem_access(struct bpf_verifier_env * env,int regno,int access_size,bool zero_size_allowed,struct bpf_call_arg_meta * meta)4119 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
4120 				   int access_size, bool zero_size_allowed,
4121 				   struct bpf_call_arg_meta *meta)
4122 {
4123 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4124 
4125 	switch (reg->type) {
4126 	case PTR_TO_PACKET:
4127 	case PTR_TO_PACKET_META:
4128 		return check_packet_access(env, regno, reg->off, access_size,
4129 					   zero_size_allowed);
4130 	case PTR_TO_MAP_VALUE:
4131 		if (check_map_access_type(env, regno, reg->off, access_size,
4132 					  meta && meta->raw_mode ? BPF_WRITE :
4133 					  BPF_READ))
4134 			return -EACCES;
4135 		return check_map_access(env, regno, reg->off, access_size,
4136 					zero_size_allowed);
4137 	case PTR_TO_MEM:
4138 		return check_mem_region_access(env, regno, reg->off,
4139 					       access_size, reg->mem_size,
4140 					       zero_size_allowed);
4141 	case PTR_TO_RDONLY_BUF:
4142 		if (meta && meta->raw_mode)
4143 			return -EACCES;
4144 		return check_buffer_access(env, reg, regno, reg->off,
4145 					   access_size, zero_size_allowed,
4146 					   "rdonly",
4147 					   &env->prog->aux->max_rdonly_access);
4148 	case PTR_TO_RDWR_BUF:
4149 		return check_buffer_access(env, reg, regno, reg->off,
4150 					   access_size, zero_size_allowed,
4151 					   "rdwr",
4152 					   &env->prog->aux->max_rdwr_access);
4153 	case PTR_TO_STACK:
4154 		return check_stack_range_initialized(
4155 				env,
4156 				regno, reg->off, access_size,
4157 				zero_size_allowed, ACCESS_HELPER, meta);
4158 	default: /* scalar_value or invalid ptr */
4159 		/* Allow zero-byte read from NULL, regardless of pointer type */
4160 		if (zero_size_allowed && access_size == 0 &&
4161 		    register_is_null(reg))
4162 			return 0;
4163 
4164 		verbose(env, "R%d type=%s expected=%s\n", regno,
4165 			reg_type_str[reg->type],
4166 			reg_type_str[PTR_TO_STACK]);
4167 		return -EACCES;
4168 	}
4169 }
4170 
4171 /* Implementation details:
4172  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4173  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4174  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4175  * value_or_null->value transition, since the verifier only cares about
4176  * the range of access to valid map value pointer and doesn't care about actual
4177  * address of the map element.
4178  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4179  * reg->id > 0 after value_or_null->value transition. By doing so
4180  * two bpf_map_lookups will be considered two different pointers that
4181  * point to different bpf_spin_locks.
4182  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4183  * dead-locks.
4184  * Since only one bpf_spin_lock is allowed the checks are simpler than
4185  * reg_is_refcounted() logic. The verifier needs to remember only
4186  * one spin_lock instead of array of acquired_refs.
4187  * cur_state->active_spin_lock remembers which map value element got locked
4188  * and clears it after bpf_spin_unlock.
4189  */
process_spin_lock(struct bpf_verifier_env * env,int regno,bool is_lock)4190 static int process_spin_lock(struct bpf_verifier_env *env, int regno,
4191 			     bool is_lock)
4192 {
4193 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4194 	struct bpf_verifier_state *cur = env->cur_state;
4195 	bool is_const = tnum_is_const(reg->var_off);
4196 	struct bpf_map *map = reg->map_ptr;
4197 	u64 val = reg->var_off.value;
4198 
4199 	if (!is_const) {
4200 		verbose(env,
4201 			"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
4202 			regno);
4203 		return -EINVAL;
4204 	}
4205 	if (!map->btf) {
4206 		verbose(env,
4207 			"map '%s' has to have BTF in order to use bpf_spin_lock\n",
4208 			map->name);
4209 		return -EINVAL;
4210 	}
4211 	if (!map_value_has_spin_lock(map)) {
4212 		if (map->spin_lock_off == -E2BIG)
4213 			verbose(env,
4214 				"map '%s' has more than one 'struct bpf_spin_lock'\n",
4215 				map->name);
4216 		else if (map->spin_lock_off == -ENOENT)
4217 			verbose(env,
4218 				"map '%s' doesn't have 'struct bpf_spin_lock'\n",
4219 				map->name);
4220 		else
4221 			verbose(env,
4222 				"map '%s' is not a struct type or bpf_spin_lock is mangled\n",
4223 				map->name);
4224 		return -EINVAL;
4225 	}
4226 	if (map->spin_lock_off != val + reg->off) {
4227 		verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n",
4228 			val + reg->off);
4229 		return -EINVAL;
4230 	}
4231 	if (is_lock) {
4232 		if (cur->active_spin_lock) {
4233 			verbose(env,
4234 				"Locking two bpf_spin_locks are not allowed\n");
4235 			return -EINVAL;
4236 		}
4237 		cur->active_spin_lock = reg->id;
4238 	} else {
4239 		if (!cur->active_spin_lock) {
4240 			verbose(env, "bpf_spin_unlock without taking a lock\n");
4241 			return -EINVAL;
4242 		}
4243 		if (cur->active_spin_lock != reg->id) {
4244 			verbose(env, "bpf_spin_unlock of different lock\n");
4245 			return -EINVAL;
4246 		}
4247 		cur->active_spin_lock = 0;
4248 	}
4249 	return 0;
4250 }
4251 
arg_type_is_mem_ptr(enum bpf_arg_type type)4252 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4253 {
4254 	return type == ARG_PTR_TO_MEM ||
4255 	       type == ARG_PTR_TO_MEM_OR_NULL ||
4256 	       type == ARG_PTR_TO_UNINIT_MEM;
4257 }
4258 
arg_type_is_mem_size(enum bpf_arg_type type)4259 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4260 {
4261 	return type == ARG_CONST_SIZE ||
4262 	       type == ARG_CONST_SIZE_OR_ZERO;
4263 }
4264 
arg_type_is_alloc_size(enum bpf_arg_type type)4265 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4266 {
4267 	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4268 }
4269 
arg_type_is_int_ptr(enum bpf_arg_type type)4270 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4271 {
4272 	return type == ARG_PTR_TO_INT ||
4273 	       type == ARG_PTR_TO_LONG;
4274 }
4275 
int_ptr_type_to_size(enum bpf_arg_type type)4276 static int int_ptr_type_to_size(enum bpf_arg_type type)
4277 {
4278 	if (type == ARG_PTR_TO_INT)
4279 		return sizeof(u32);
4280 	else if (type == ARG_PTR_TO_LONG)
4281 		return sizeof(u64);
4282 
4283 	return -EINVAL;
4284 }
4285 
resolve_map_arg_type(struct bpf_verifier_env * env,const struct bpf_call_arg_meta * meta,enum bpf_arg_type * arg_type)4286 static int resolve_map_arg_type(struct bpf_verifier_env *env,
4287 				 const struct bpf_call_arg_meta *meta,
4288 				 enum bpf_arg_type *arg_type)
4289 {
4290 	if (!meta->map_ptr) {
4291 		/* kernel subsystem misconfigured verifier */
4292 		verbose(env, "invalid map_ptr to access map->type\n");
4293 		return -EACCES;
4294 	}
4295 
4296 	switch (meta->map_ptr->map_type) {
4297 	case BPF_MAP_TYPE_SOCKMAP:
4298 	case BPF_MAP_TYPE_SOCKHASH:
4299 		if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4300 			*arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4301 		} else {
4302 			verbose(env, "invalid arg_type for sockmap/sockhash\n");
4303 			return -EINVAL;
4304 		}
4305 		break;
4306 
4307 	default:
4308 		break;
4309 	}
4310 	return 0;
4311 }
4312 
4313 struct bpf_reg_types {
4314 	const enum bpf_reg_type types[10];
4315 	u32 *btf_id;
4316 };
4317 
4318 static const struct bpf_reg_types map_key_value_types = {
4319 	.types = {
4320 		PTR_TO_STACK,
4321 		PTR_TO_PACKET,
4322 		PTR_TO_PACKET_META,
4323 		PTR_TO_MAP_VALUE,
4324 	},
4325 };
4326 
4327 static const struct bpf_reg_types sock_types = {
4328 	.types = {
4329 		PTR_TO_SOCK_COMMON,
4330 		PTR_TO_SOCKET,
4331 		PTR_TO_TCP_SOCK,
4332 		PTR_TO_XDP_SOCK,
4333 	},
4334 };
4335 
4336 #ifdef CONFIG_NET
4337 static const struct bpf_reg_types btf_id_sock_common_types = {
4338 	.types = {
4339 		PTR_TO_SOCK_COMMON,
4340 		PTR_TO_SOCKET,
4341 		PTR_TO_TCP_SOCK,
4342 		PTR_TO_XDP_SOCK,
4343 		PTR_TO_BTF_ID,
4344 	},
4345 	.btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4346 };
4347 #endif
4348 
4349 static const struct bpf_reg_types mem_types = {
4350 	.types = {
4351 		PTR_TO_STACK,
4352 		PTR_TO_PACKET,
4353 		PTR_TO_PACKET_META,
4354 		PTR_TO_MAP_VALUE,
4355 		PTR_TO_MEM,
4356 		PTR_TO_RDONLY_BUF,
4357 		PTR_TO_RDWR_BUF,
4358 	},
4359 };
4360 
4361 static const struct bpf_reg_types int_ptr_types = {
4362 	.types = {
4363 		PTR_TO_STACK,
4364 		PTR_TO_PACKET,
4365 		PTR_TO_PACKET_META,
4366 		PTR_TO_MAP_VALUE,
4367 	},
4368 };
4369 
4370 static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
4371 static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
4372 static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
4373 static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
4374 static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
4375 static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
4376 static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
4377 static const struct bpf_reg_types percpu_btf_ptr_types = { .types = { PTR_TO_PERCPU_BTF_ID } };
4378 
4379 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4380 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
4381 	[ARG_PTR_TO_MAP_VALUE]		= &map_key_value_types,
4382 	[ARG_PTR_TO_UNINIT_MAP_VALUE]	= &map_key_value_types,
4383 	[ARG_PTR_TO_MAP_VALUE_OR_NULL]	= &map_key_value_types,
4384 	[ARG_CONST_SIZE]		= &scalar_types,
4385 	[ARG_CONST_SIZE_OR_ZERO]	= &scalar_types,
4386 	[ARG_CONST_ALLOC_SIZE_OR_ZERO]	= &scalar_types,
4387 	[ARG_CONST_MAP_PTR]		= &const_map_ptr_types,
4388 	[ARG_PTR_TO_CTX]		= &context_types,
4389 	[ARG_PTR_TO_CTX_OR_NULL]	= &context_types,
4390 	[ARG_PTR_TO_SOCK_COMMON]	= &sock_types,
4391 #ifdef CONFIG_NET
4392 	[ARG_PTR_TO_BTF_ID_SOCK_COMMON]	= &btf_id_sock_common_types,
4393 #endif
4394 	[ARG_PTR_TO_SOCKET]		= &fullsock_types,
4395 	[ARG_PTR_TO_SOCKET_OR_NULL]	= &fullsock_types,
4396 	[ARG_PTR_TO_BTF_ID]		= &btf_ptr_types,
4397 	[ARG_PTR_TO_SPIN_LOCK]		= &spin_lock_types,
4398 	[ARG_PTR_TO_MEM]		= &mem_types,
4399 	[ARG_PTR_TO_MEM_OR_NULL]	= &mem_types,
4400 	[ARG_PTR_TO_UNINIT_MEM]		= &mem_types,
4401 	[ARG_PTR_TO_ALLOC_MEM]		= &alloc_mem_types,
4402 	[ARG_PTR_TO_ALLOC_MEM_OR_NULL]	= &alloc_mem_types,
4403 	[ARG_PTR_TO_INT]		= &int_ptr_types,
4404 	[ARG_PTR_TO_LONG]		= &int_ptr_types,
4405 	[ARG_PTR_TO_PERCPU_BTF_ID]	= &percpu_btf_ptr_types,
4406 };
4407 
check_reg_type(struct bpf_verifier_env * env,u32 regno,enum bpf_arg_type arg_type,const u32 * arg_btf_id)4408 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
4409 			  enum bpf_arg_type arg_type,
4410 			  const u32 *arg_btf_id)
4411 {
4412 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4413 	enum bpf_reg_type expected, type = reg->type;
4414 	const struct bpf_reg_types *compatible;
4415 	int i, j;
4416 
4417 	compatible = compatible_reg_types[arg_type];
4418 	if (!compatible) {
4419 		verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4420 		return -EFAULT;
4421 	}
4422 
4423 	for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4424 		expected = compatible->types[i];
4425 		if (expected == NOT_INIT)
4426 			break;
4427 
4428 		if (type == expected)
4429 			goto found;
4430 	}
4431 
4432 	verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
4433 	for (j = 0; j + 1 < i; j++)
4434 		verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
4435 	verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
4436 	return -EACCES;
4437 
4438 found:
4439 	if (type == PTR_TO_BTF_ID) {
4440 		if (!arg_btf_id) {
4441 			if (!compatible->btf_id) {
4442 				verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4443 				return -EFAULT;
4444 			}
4445 			arg_btf_id = compatible->btf_id;
4446 		}
4447 
4448 		if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
4449 					  *arg_btf_id)) {
4450 			verbose(env, "R%d is of type %s but %s is expected\n",
4451 				regno, kernel_type_name(reg->btf_id),
4452 				kernel_type_name(*arg_btf_id));
4453 			return -EACCES;
4454 		}
4455 
4456 		if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
4457 			verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
4458 				regno);
4459 			return -EACCES;
4460 		}
4461 	}
4462 
4463 	return 0;
4464 }
4465 
check_func_arg(struct bpf_verifier_env * env,u32 arg,struct bpf_call_arg_meta * meta,const struct bpf_func_proto * fn)4466 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
4467 			  struct bpf_call_arg_meta *meta,
4468 			  const struct bpf_func_proto *fn)
4469 {
4470 	u32 regno = BPF_REG_1 + arg;
4471 	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4472 	enum bpf_arg_type arg_type = fn->arg_type[arg];
4473 	enum bpf_reg_type type = reg->type;
4474 	int err = 0;
4475 
4476 	if (arg_type == ARG_DONTCARE)
4477 		return 0;
4478 
4479 	err = check_reg_arg(env, regno, SRC_OP);
4480 	if (err)
4481 		return err;
4482 
4483 	if (arg_type == ARG_ANYTHING) {
4484 		if (is_pointer_value(env, regno)) {
4485 			verbose(env, "R%d leaks addr into helper function\n",
4486 				regno);
4487 			return -EACCES;
4488 		}
4489 		return 0;
4490 	}
4491 
4492 	if (type_is_pkt_pointer(type) &&
4493 	    !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4494 		verbose(env, "helper access to the packet is not allowed\n");
4495 		return -EACCES;
4496 	}
4497 
4498 	if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4499 	    arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
4500 	    arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
4501 		err = resolve_map_arg_type(env, meta, &arg_type);
4502 		if (err)
4503 			return err;
4504 	}
4505 
4506 	if (register_is_null(reg) && arg_type_may_be_null(arg_type))
4507 		/* A NULL register has a SCALAR_VALUE type, so skip
4508 		 * type checking.
4509 		 */
4510 		goto skip_type_check;
4511 
4512 	err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4513 	if (err)
4514 		return err;
4515 
4516 	if (type == PTR_TO_CTX) {
4517 		err = check_ctx_reg(env, reg, regno);
4518 		if (err < 0)
4519 			return err;
4520 	}
4521 
4522 skip_type_check:
4523 	if (reg->ref_obj_id) {
4524 		if (meta->ref_obj_id) {
4525 			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
4526 				regno, reg->ref_obj_id,
4527 				meta->ref_obj_id);
4528 			return -EFAULT;
4529 		}
4530 		meta->ref_obj_id = reg->ref_obj_id;
4531 	}
4532 
4533 	if (arg_type == ARG_CONST_MAP_PTR) {
4534 		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4535 		meta->map_ptr = reg->map_ptr;
4536 	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
4537 		/* bpf_map_xxx(..., map_ptr, ..., key) call:
4538 		 * check that [key, key + map->key_size) are within
4539 		 * stack limits and initialized
4540 		 */
4541 		if (!meta->map_ptr) {
4542 			/* in function declaration map_ptr must come before
4543 			 * map_key, so that it's verified and known before
4544 			 * we have to check map_key here. Otherwise it means
4545 			 * that kernel subsystem misconfigured verifier
4546 			 */
4547 			verbose(env, "invalid map_ptr to access map->key\n");
4548 			return -EACCES;
4549 		}
4550 		err = check_helper_mem_access(env, regno,
4551 					      meta->map_ptr->key_size, false,
4552 					      NULL);
4553 	} else if (arg_type == ARG_PTR_TO_MAP_VALUE ||
4554 		   (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL &&
4555 		    !register_is_null(reg)) ||
4556 		   arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4557 		/* bpf_map_xxx(..., map_ptr, ..., value) call:
4558 		 * check [value, value + map->value_size) validity
4559 		 */
4560 		if (!meta->map_ptr) {
4561 			/* kernel subsystem misconfigured verifier */
4562 			verbose(env, "invalid map_ptr to access map->value\n");
4563 			return -EACCES;
4564 		}
4565 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4566 		err = check_helper_mem_access(env, regno,
4567 					      meta->map_ptr->value_size, false,
4568 					      meta);
4569 	} else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4570 		if (!reg->btf_id) {
4571 			verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4572 			return -EACCES;
4573 		}
4574 		meta->ret_btf_id = reg->btf_id;
4575 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4576 		if (meta->func_id == BPF_FUNC_spin_lock) {
4577 			if (process_spin_lock(env, regno, true))
4578 				return -EACCES;
4579 		} else if (meta->func_id == BPF_FUNC_spin_unlock) {
4580 			if (process_spin_lock(env, regno, false))
4581 				return -EACCES;
4582 		} else {
4583 			verbose(env, "verifier internal error\n");
4584 			return -EFAULT;
4585 		}
4586 	} else if (arg_type_is_mem_ptr(arg_type)) {
4587 		/* The access to this pointer is only checked when we hit the
4588 		 * next is_mem_size argument below.
4589 		 */
4590 		meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4591 	} else if (arg_type_is_mem_size(arg_type)) {
4592 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4593 
4594 		/* This is used to refine r0 return value bounds for helpers
4595 		 * that enforce this value as an upper bound on return values.
4596 		 * See do_refine_retval_range() for helpers that can refine
4597 		 * the return value. C type of helper is u32 so we pull register
4598 		 * bound from umax_value however, if negative verifier errors
4599 		 * out. Only upper bounds can be learned because retval is an
4600 		 * int type and negative retvals are allowed.
4601 		 */
4602 		meta->msize_max_value = reg->umax_value;
4603 
4604 		/* The register is SCALAR_VALUE; the access check
4605 		 * happens using its boundaries.
4606 		 */
4607 		if (!tnum_is_const(reg->var_off))
4608 			/* For unprivileged variable accesses, disable raw
4609 			 * mode so that the program is required to
4610 			 * initialize all the memory that the helper could
4611 			 * just partially fill up.
4612 			 */
4613 			meta = NULL;
4614 
4615 		if (reg->smin_value < 0) {
4616 			verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n",
4617 				regno);
4618 			return -EACCES;
4619 		}
4620 
4621 		if (reg->umin_value == 0) {
4622 			err = check_helper_mem_access(env, regno - 1, 0,
4623 						      zero_size_allowed,
4624 						      meta);
4625 			if (err)
4626 				return err;
4627 		}
4628 
4629 		if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4630 			verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
4631 				regno);
4632 			return -EACCES;
4633 		}
4634 		err = check_helper_mem_access(env, regno - 1,
4635 					      reg->umax_value,
4636 					      zero_size_allowed, meta);
4637 		if (!err)
4638 			err = mark_chain_precision(env, regno);
4639 	} else if (arg_type_is_alloc_size(arg_type)) {
4640 		if (!tnum_is_const(reg->var_off)) {
4641 			verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
4642 				regno);
4643 			return -EACCES;
4644 		}
4645 		meta->mem_size = reg->var_off.value;
4646 	} else if (arg_type_is_int_ptr(arg_type)) {
4647 		int size = int_ptr_type_to_size(arg_type);
4648 
4649 		err = check_helper_mem_access(env, regno, size, false, meta);
4650 		if (err)
4651 			return err;
4652 		err = check_ptr_alignment(env, reg, 0, size, true);
4653 	}
4654 
4655 	return err;
4656 }
4657 
may_update_sockmap(struct bpf_verifier_env * env,int func_id)4658 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4659 {
4660 	enum bpf_attach_type eatype = env->prog->expected_attach_type;
4661 	enum bpf_prog_type type = resolve_prog_type(env->prog);
4662 
4663 	if (func_id != BPF_FUNC_map_update_elem)
4664 		return false;
4665 
4666 	/* It's not possible to get access to a locked struct sock in these
4667 	 * contexts, so updating is safe.
4668 	 */
4669 	switch (type) {
4670 	case BPF_PROG_TYPE_TRACING:
4671 		if (eatype == BPF_TRACE_ITER)
4672 			return true;
4673 		break;
4674 	case BPF_PROG_TYPE_SOCKET_FILTER:
4675 	case BPF_PROG_TYPE_SCHED_CLS:
4676 	case BPF_PROG_TYPE_SCHED_ACT:
4677 	case BPF_PROG_TYPE_XDP:
4678 	case BPF_PROG_TYPE_SK_REUSEPORT:
4679 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
4680 	case BPF_PROG_TYPE_SK_LOOKUP:
4681 		return true;
4682 	default:
4683 		break;
4684 	}
4685 
4686 	verbose(env, "cannot update sockmap in this context\n");
4687 	return false;
4688 }
4689 
allow_tail_call_in_subprogs(struct bpf_verifier_env * env)4690 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4691 {
4692 	return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
4693 }
4694 
check_map_func_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,int func_id)4695 static int check_map_func_compatibility(struct bpf_verifier_env *env,
4696 					struct bpf_map *map, int func_id)
4697 {
4698 	if (!map)
4699 		return 0;
4700 
4701 	/* We need a two way check, first is from map perspective ... */
4702 	switch (map->map_type) {
4703 	case BPF_MAP_TYPE_PROG_ARRAY:
4704 		if (func_id != BPF_FUNC_tail_call)
4705 			goto error;
4706 		break;
4707 	case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4708 		if (func_id != BPF_FUNC_perf_event_read &&
4709 		    func_id != BPF_FUNC_perf_event_output &&
4710 		    func_id != BPF_FUNC_skb_output &&
4711 		    func_id != BPF_FUNC_perf_event_read_value &&
4712 		    func_id != BPF_FUNC_xdp_output)
4713 			goto error;
4714 		break;
4715 	case BPF_MAP_TYPE_RINGBUF:
4716 		if (func_id != BPF_FUNC_ringbuf_output &&
4717 		    func_id != BPF_FUNC_ringbuf_reserve &&
4718 		    func_id != BPF_FUNC_ringbuf_query)
4719 			goto error;
4720 		break;
4721 	case BPF_MAP_TYPE_STACK_TRACE:
4722 		if (func_id != BPF_FUNC_get_stackid)
4723 			goto error;
4724 		break;
4725 	case BPF_MAP_TYPE_CGROUP_ARRAY:
4726 		if (func_id != BPF_FUNC_skb_under_cgroup &&
4727 		    func_id != BPF_FUNC_current_task_under_cgroup)
4728 			goto error;
4729 		break;
4730 	case BPF_MAP_TYPE_CGROUP_STORAGE:
4731 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4732 		if (func_id != BPF_FUNC_get_local_storage)
4733 			goto error;
4734 		break;
4735 	case BPF_MAP_TYPE_DEVMAP:
4736 	case BPF_MAP_TYPE_DEVMAP_HASH:
4737 		if (func_id != BPF_FUNC_redirect_map &&
4738 		    func_id != BPF_FUNC_map_lookup_elem)
4739 			goto error;
4740 		break;
4741 	/* Restrict bpf side of cpumap and xskmap, open when use-cases
4742 	 * appear.
4743 	 */
4744 	case BPF_MAP_TYPE_CPUMAP:
4745 		if (func_id != BPF_FUNC_redirect_map)
4746 			goto error;
4747 		break;
4748 	case BPF_MAP_TYPE_XSKMAP:
4749 		if (func_id != BPF_FUNC_redirect_map &&
4750 		    func_id != BPF_FUNC_map_lookup_elem)
4751 			goto error;
4752 		break;
4753 	case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4754 	case BPF_MAP_TYPE_HASH_OF_MAPS:
4755 		if (func_id != BPF_FUNC_map_lookup_elem)
4756 			goto error;
4757 		break;
4758 	case BPF_MAP_TYPE_SOCKMAP:
4759 		if (func_id != BPF_FUNC_sk_redirect_map &&
4760 		    func_id != BPF_FUNC_sock_map_update &&
4761 		    func_id != BPF_FUNC_map_delete_elem &&
4762 		    func_id != BPF_FUNC_msg_redirect_map &&
4763 		    func_id != BPF_FUNC_sk_select_reuseport &&
4764 		    func_id != BPF_FUNC_map_lookup_elem &&
4765 		    !may_update_sockmap(env, func_id))
4766 			goto error;
4767 		break;
4768 	case BPF_MAP_TYPE_SOCKHASH:
4769 		if (func_id != BPF_FUNC_sk_redirect_hash &&
4770 		    func_id != BPF_FUNC_sock_hash_update &&
4771 		    func_id != BPF_FUNC_map_delete_elem &&
4772 		    func_id != BPF_FUNC_msg_redirect_hash &&
4773 		    func_id != BPF_FUNC_sk_select_reuseport &&
4774 		    func_id != BPF_FUNC_map_lookup_elem &&
4775 		    !may_update_sockmap(env, func_id))
4776 			goto error;
4777 		break;
4778 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
4779 		if (func_id != BPF_FUNC_sk_select_reuseport)
4780 			goto error;
4781 		break;
4782 	case BPF_MAP_TYPE_QUEUE:
4783 	case BPF_MAP_TYPE_STACK:
4784 		if (func_id != BPF_FUNC_map_peek_elem &&
4785 		    func_id != BPF_FUNC_map_pop_elem &&
4786 		    func_id != BPF_FUNC_map_push_elem)
4787 			goto error;
4788 		break;
4789 	case BPF_MAP_TYPE_SK_STORAGE:
4790 		if (func_id != BPF_FUNC_sk_storage_get &&
4791 		    func_id != BPF_FUNC_sk_storage_delete)
4792 			goto error;
4793 		break;
4794 	case BPF_MAP_TYPE_INODE_STORAGE:
4795 		if (func_id != BPF_FUNC_inode_storage_get &&
4796 		    func_id != BPF_FUNC_inode_storage_delete)
4797 			goto error;
4798 		break;
4799 	default:
4800 		break;
4801 	}
4802 
4803 	/* ... and second from the function itself. */
4804 	switch (func_id) {
4805 	case BPF_FUNC_tail_call:
4806 		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
4807 			goto error;
4808 		if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4809 			verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4810 			return -EINVAL;
4811 		}
4812 		break;
4813 	case BPF_FUNC_perf_event_read:
4814 	case BPF_FUNC_perf_event_output:
4815 	case BPF_FUNC_perf_event_read_value:
4816 	case BPF_FUNC_skb_output:
4817 	case BPF_FUNC_xdp_output:
4818 		if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
4819 			goto error;
4820 		break;
4821 	case BPF_FUNC_ringbuf_output:
4822 	case BPF_FUNC_ringbuf_reserve:
4823 	case BPF_FUNC_ringbuf_query:
4824 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
4825 			goto error;
4826 		break;
4827 	case BPF_FUNC_get_stackid:
4828 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
4829 			goto error;
4830 		break;
4831 	case BPF_FUNC_current_task_under_cgroup:
4832 	case BPF_FUNC_skb_under_cgroup:
4833 		if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
4834 			goto error;
4835 		break;
4836 	case BPF_FUNC_redirect_map:
4837 		if (map->map_type != BPF_MAP_TYPE_DEVMAP &&
4838 		    map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4839 		    map->map_type != BPF_MAP_TYPE_CPUMAP &&
4840 		    map->map_type != BPF_MAP_TYPE_XSKMAP)
4841 			goto error;
4842 		break;
4843 	case BPF_FUNC_sk_redirect_map:
4844 	case BPF_FUNC_msg_redirect_map:
4845 	case BPF_FUNC_sock_map_update:
4846 		if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
4847 			goto error;
4848 		break;
4849 	case BPF_FUNC_sk_redirect_hash:
4850 	case BPF_FUNC_msg_redirect_hash:
4851 	case BPF_FUNC_sock_hash_update:
4852 		if (map->map_type != BPF_MAP_TYPE_SOCKHASH)
4853 			goto error;
4854 		break;
4855 	case BPF_FUNC_get_local_storage:
4856 		if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE &&
4857 		    map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
4858 			goto error;
4859 		break;
4860 	case BPF_FUNC_sk_select_reuseport:
4861 		if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY &&
4862 		    map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4863 		    map->map_type != BPF_MAP_TYPE_SOCKHASH)
4864 			goto error;
4865 		break;
4866 	case BPF_FUNC_map_peek_elem:
4867 	case BPF_FUNC_map_pop_elem:
4868 	case BPF_FUNC_map_push_elem:
4869 		if (map->map_type != BPF_MAP_TYPE_QUEUE &&
4870 		    map->map_type != BPF_MAP_TYPE_STACK)
4871 			goto error;
4872 		break;
4873 	case BPF_FUNC_sk_storage_get:
4874 	case BPF_FUNC_sk_storage_delete:
4875 		if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
4876 			goto error;
4877 		break;
4878 	case BPF_FUNC_inode_storage_get:
4879 	case BPF_FUNC_inode_storage_delete:
4880 		if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
4881 			goto error;
4882 		break;
4883 	default:
4884 		break;
4885 	}
4886 
4887 	return 0;
4888 error:
4889 	verbose(env, "cannot pass map_type %d into func %s#%d\n",
4890 		map->map_type, func_id_name(func_id), func_id);
4891 	return -EINVAL;
4892 }
4893 
check_raw_mode_ok(const struct bpf_func_proto * fn)4894 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4895 {
4896 	int count = 0;
4897 
4898 	if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
4899 		count++;
4900 	if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
4901 		count++;
4902 	if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
4903 		count++;
4904 	if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
4905 		count++;
4906 	if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
4907 		count++;
4908 
4909 	/* We only support one arg being in raw mode at the moment,
4910 	 * which is sufficient for the helper functions we have
4911 	 * right now.
4912 	 */
4913 	return count <= 1;
4914 }
4915 
check_args_pair_invalid(enum bpf_arg_type arg_curr,enum bpf_arg_type arg_next)4916 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr,
4917 				    enum bpf_arg_type arg_next)
4918 {
4919 	return (arg_type_is_mem_ptr(arg_curr) &&
4920 	        !arg_type_is_mem_size(arg_next)) ||
4921 	       (!arg_type_is_mem_ptr(arg_curr) &&
4922 		arg_type_is_mem_size(arg_next));
4923 }
4924 
check_arg_pair_ok(const struct bpf_func_proto * fn)4925 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4926 {
4927 	/* bpf_xxx(..., buf, len) call will access 'len'
4928 	 * bytes from memory 'buf'. Both arg types need
4929 	 * to be paired, so make sure there's no buggy
4930 	 * helper function specification.
4931 	 */
4932 	if (arg_type_is_mem_size(fn->arg1_type) ||
4933 	    arg_type_is_mem_ptr(fn->arg5_type)  ||
4934 	    check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4935 	    check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4936 	    check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4937 	    check_args_pair_invalid(fn->arg4_type, fn->arg5_type))
4938 		return false;
4939 
4940 	return true;
4941 }
4942 
check_refcount_ok(const struct bpf_func_proto * fn,int func_id)4943 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4944 {
4945 	int count = 0;
4946 
4947 	if (arg_type_may_be_refcounted(fn->arg1_type))
4948 		count++;
4949 	if (arg_type_may_be_refcounted(fn->arg2_type))
4950 		count++;
4951 	if (arg_type_may_be_refcounted(fn->arg3_type))
4952 		count++;
4953 	if (arg_type_may_be_refcounted(fn->arg4_type))
4954 		count++;
4955 	if (arg_type_may_be_refcounted(fn->arg5_type))
4956 		count++;
4957 
4958 	/* A reference acquiring function cannot acquire
4959 	 * another refcounted ptr.
4960 	 */
4961 	if (may_be_acquire_function(func_id) && count)
4962 		return false;
4963 
4964 	/* We only support one arg being unreferenced at the moment,
4965 	 * which is sufficient for the helper functions we have right now.
4966 	 */
4967 	return count <= 1;
4968 }
4969 
check_btf_id_ok(const struct bpf_func_proto * fn)4970 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4971 {
4972 	int i;
4973 
4974 	for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4975 		if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
4976 			return false;
4977 
4978 		if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
4979 			return false;
4980 	}
4981 
4982 	return true;
4983 }
4984 
check_func_proto(const struct bpf_func_proto * fn,int func_id)4985 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4986 {
4987 	return check_raw_mode_ok(fn) &&
4988 	       check_arg_pair_ok(fn) &&
4989 	       check_btf_id_ok(fn) &&
4990 	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
4991 }
4992 
4993 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4994  * are now invalid, so turn them into unknown SCALAR_VALUE.
4995  */
clear_all_pkt_pointers(struct bpf_verifier_env * env)4996 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
4997 {
4998 	struct bpf_func_state *state;
4999 	struct bpf_reg_state *reg;
5000 
5001 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
5002 		if (reg_is_pkt_pointer_any(reg))
5003 			__mark_reg_unknown(env, reg);
5004 	}));
5005 }
5006 
5007 enum {
5008 	AT_PKT_END = -1,
5009 	BEYOND_PKT_END = -2,
5010 };
5011 
mark_pkt_end(struct bpf_verifier_state * vstate,int regn,bool range_open)5012 static void mark_pkt_end(struct bpf_verifier_state *vstate, int regn, bool range_open)
5013 {
5014 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5015 	struct bpf_reg_state *reg = &state->regs[regn];
5016 
5017 	if (reg->type != PTR_TO_PACKET)
5018 		/* PTR_TO_PACKET_META is not supported yet */
5019 		return;
5020 
5021 	/* The 'reg' is pkt > pkt_end or pkt >= pkt_end.
5022 	 * How far beyond pkt_end it goes is unknown.
5023 	 * if (!range_open) it's the case of pkt >= pkt_end
5024 	 * if (range_open) it's the case of pkt > pkt_end
5025 	 * hence this pointer is at least 1 byte bigger than pkt_end
5026 	 */
5027 	if (range_open)
5028 		reg->range = BEYOND_PKT_END;
5029 	else
5030 		reg->range = AT_PKT_END;
5031 }
5032 
5033 /* The pointer with the specified id has released its reference to kernel
5034  * resources. Identify all copies of the same pointer and clear the reference.
5035  */
release_reference(struct bpf_verifier_env * env,int ref_obj_id)5036 static int release_reference(struct bpf_verifier_env *env,
5037 			     int ref_obj_id)
5038 {
5039 	struct bpf_func_state *state;
5040 	struct bpf_reg_state *reg;
5041 	int err;
5042 
5043 	err = release_reference_state(cur_func(env), ref_obj_id);
5044 	if (err)
5045 		return err;
5046 
5047 	bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
5048 		if (reg->ref_obj_id == ref_obj_id) {
5049 			if (!env->allow_ptr_leaks)
5050 				__mark_reg_not_init(env, reg);
5051 			else
5052 				__mark_reg_unknown(env, reg);
5053 		}
5054 	}));
5055 
5056 	return 0;
5057 }
5058 
clear_caller_saved_regs(struct bpf_verifier_env * env,struct bpf_reg_state * regs)5059 static void clear_caller_saved_regs(struct bpf_verifier_env *env,
5060 				    struct bpf_reg_state *regs)
5061 {
5062 	int i;
5063 
5064 	/* after the call registers r0 - r5 were scratched */
5065 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
5066 		mark_reg_not_init(env, regs, caller_saved[i]);
5067 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5068 	}
5069 }
5070 
check_func_call(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)5071 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
5072 			   int *insn_idx)
5073 {
5074 	struct bpf_verifier_state *state = env->cur_state;
5075 	struct bpf_func_info_aux *func_info_aux;
5076 	struct bpf_func_state *caller, *callee;
5077 	int i, err, subprog, target_insn;
5078 	bool is_global = false;
5079 
5080 	if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5081 		verbose(env, "the call stack of %d frames is too deep\n",
5082 			state->curframe + 2);
5083 		return -E2BIG;
5084 	}
5085 
5086 	target_insn = *insn_idx + insn->imm;
5087 	subprog = find_subprog(env, target_insn + 1);
5088 	if (subprog < 0) {
5089 		verbose(env, "verifier bug. No program starts at insn %d\n",
5090 			target_insn + 1);
5091 		return -EFAULT;
5092 	}
5093 
5094 	caller = state->frame[state->curframe];
5095 	if (state->frame[state->curframe + 1]) {
5096 		verbose(env, "verifier bug. Frame %d already allocated\n",
5097 			state->curframe + 1);
5098 		return -EFAULT;
5099 	}
5100 
5101 	func_info_aux = env->prog->aux->func_info_aux;
5102 	if (func_info_aux)
5103 		is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5104 	err = btf_check_func_arg_match(env, subprog, caller->regs);
5105 	if (err == -EFAULT)
5106 		return err;
5107 	if (is_global) {
5108 		if (err) {
5109 			verbose(env, "Caller passes invalid args into func#%d\n",
5110 				subprog);
5111 			return err;
5112 		} else {
5113 			if (env->log.level & BPF_LOG_LEVEL)
5114 				verbose(env,
5115 					"Func#%d is global and valid. Skipping.\n",
5116 					subprog);
5117 			clear_caller_saved_regs(env, caller->regs);
5118 
5119 			/* All global functions return a 64-bit SCALAR_VALUE */
5120 			mark_reg_unknown(env, caller->regs, BPF_REG_0);
5121 			caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5122 
5123 			/* continue with next insn after call */
5124 			return 0;
5125 		}
5126 	}
5127 
5128 	callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5129 	if (!callee)
5130 		return -ENOMEM;
5131 	state->frame[state->curframe + 1] = callee;
5132 
5133 	/* callee cannot access r0, r6 - r9 for reading and has to write
5134 	 * into its own stack before reading from it.
5135 	 * callee can read/write into caller's stack
5136 	 */
5137 	init_func_state(env, callee,
5138 			/* remember the callsite, it will be used by bpf_exit */
5139 			*insn_idx /* callsite */,
5140 			state->curframe + 1 /* frameno within this callchain */,
5141 			subprog /* subprog number within this prog */);
5142 
5143 	/* Transfer references to the callee */
5144 	err = transfer_reference_state(callee, caller);
5145 	if (err)
5146 		return err;
5147 
5148 	/* copy r1 - r5 args that callee can access.  The copy includes parent
5149 	 * pointers, which connects us up to the liveness chain
5150 	 */
5151 	for (i = BPF_REG_1; i <= BPF_REG_5; i++)
5152 		callee->regs[i] = caller->regs[i];
5153 
5154 	clear_caller_saved_regs(env, caller->regs);
5155 
5156 	/* only increment it after check_reg_arg() finished */
5157 	state->curframe++;
5158 
5159 	/* and go analyze first insn of the callee */
5160 	*insn_idx = target_insn;
5161 
5162 	if (env->log.level & BPF_LOG_LEVEL) {
5163 		verbose(env, "caller:\n");
5164 		print_verifier_state(env, caller);
5165 		verbose(env, "callee:\n");
5166 		print_verifier_state(env, callee);
5167 	}
5168 	return 0;
5169 }
5170 
prepare_func_exit(struct bpf_verifier_env * env,int * insn_idx)5171 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
5172 {
5173 	struct bpf_verifier_state *state = env->cur_state;
5174 	struct bpf_func_state *caller, *callee;
5175 	struct bpf_reg_state *r0;
5176 	int err;
5177 
5178 	callee = state->frame[state->curframe];
5179 	r0 = &callee->regs[BPF_REG_0];
5180 	if (r0->type == PTR_TO_STACK) {
5181 		/* technically it's ok to return caller's stack pointer
5182 		 * (or caller's caller's pointer) back to the caller,
5183 		 * since these pointers are valid. Only current stack
5184 		 * pointer will be invalid as soon as function exits,
5185 		 * but let's be conservative
5186 		 */
5187 		verbose(env, "cannot return stack pointer to the caller\n");
5188 		return -EINVAL;
5189 	}
5190 
5191 	state->curframe--;
5192 	caller = state->frame[state->curframe];
5193 	/* return to the caller whatever r0 had in the callee */
5194 	caller->regs[BPF_REG_0] = *r0;
5195 
5196 	/* Transfer references to the caller */
5197 	err = transfer_reference_state(caller, callee);
5198 	if (err)
5199 		return err;
5200 
5201 	*insn_idx = callee->callsite + 1;
5202 	if (env->log.level & BPF_LOG_LEVEL) {
5203 		verbose(env, "returning from callee:\n");
5204 		print_verifier_state(env, callee);
5205 		verbose(env, "to caller at %d:\n", *insn_idx);
5206 		print_verifier_state(env, caller);
5207 	}
5208 	/* clear everything in the callee */
5209 	free_func_state(callee);
5210 	state->frame[state->curframe + 1] = NULL;
5211 	return 0;
5212 }
5213 
do_refine_retval_range(struct bpf_reg_state * regs,int ret_type,int func_id,struct bpf_call_arg_meta * meta)5214 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type,
5215 				   int func_id,
5216 				   struct bpf_call_arg_meta *meta)
5217 {
5218 	struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
5219 
5220 	if (ret_type != RET_INTEGER ||
5221 	    (func_id != BPF_FUNC_get_stack &&
5222 	     func_id != BPF_FUNC_probe_read_str &&
5223 	     func_id != BPF_FUNC_probe_read_kernel_str &&
5224 	     func_id != BPF_FUNC_probe_read_user_str))
5225 		return;
5226 
5227 	ret_reg->smax_value = meta->msize_max_value;
5228 	ret_reg->s32_max_value = meta->msize_max_value;
5229 	ret_reg->smin_value = -MAX_ERRNO;
5230 	ret_reg->s32_min_value = -MAX_ERRNO;
5231 	reg_bounds_sync(ret_reg);
5232 }
5233 
5234 static int
record_func_map(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)5235 record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5236 		int func_id, int insn_idx)
5237 {
5238 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5239 	struct bpf_map *map = meta->map_ptr;
5240 
5241 	if (func_id != BPF_FUNC_tail_call &&
5242 	    func_id != BPF_FUNC_map_lookup_elem &&
5243 	    func_id != BPF_FUNC_map_update_elem &&
5244 	    func_id != BPF_FUNC_map_delete_elem &&
5245 	    func_id != BPF_FUNC_map_push_elem &&
5246 	    func_id != BPF_FUNC_map_pop_elem &&
5247 	    func_id != BPF_FUNC_map_peek_elem)
5248 		return 0;
5249 
5250 	if (map == NULL) {
5251 		verbose(env, "kernel subsystem misconfigured verifier\n");
5252 		return -EINVAL;
5253 	}
5254 
5255 	/* In case of read-only, some additional restrictions
5256 	 * need to be applied in order to prevent altering the
5257 	 * state of the map from program side.
5258 	 */
5259 	if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5260 	    (func_id == BPF_FUNC_map_delete_elem ||
5261 	     func_id == BPF_FUNC_map_update_elem ||
5262 	     func_id == BPF_FUNC_map_push_elem ||
5263 	     func_id == BPF_FUNC_map_pop_elem)) {
5264 		verbose(env, "write into map forbidden\n");
5265 		return -EACCES;
5266 	}
5267 
5268 	if (!BPF_MAP_PTR(aux->map_ptr_state))
5269 		bpf_map_ptr_store(aux, meta->map_ptr,
5270 				  !meta->map_ptr->bypass_spec_v1);
5271 	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
5272 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
5273 				  !meta->map_ptr->bypass_spec_v1);
5274 	return 0;
5275 }
5276 
5277 static int
record_func_key(struct bpf_verifier_env * env,struct bpf_call_arg_meta * meta,int func_id,int insn_idx)5278 record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
5279 		int func_id, int insn_idx)
5280 {
5281 	struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5282 	struct bpf_reg_state *regs = cur_regs(env), *reg;
5283 	struct bpf_map *map = meta->map_ptr;
5284 	u64 val, max;
5285 	int err;
5286 
5287 	if (func_id != BPF_FUNC_tail_call)
5288 		return 0;
5289 	if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5290 		verbose(env, "kernel subsystem misconfigured verifier\n");
5291 		return -EINVAL;
5292 	}
5293 
5294 	reg = &regs[BPF_REG_3];
5295 	val = reg->var_off.value;
5296 	max = map->max_entries;
5297 
5298 	if (!(register_is_const(reg) && val < max)) {
5299 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5300 		return 0;
5301 	}
5302 
5303 	err = mark_chain_precision(env, BPF_REG_3);
5304 	if (err)
5305 		return err;
5306 	if (bpf_map_key_unseen(aux))
5307 		bpf_map_key_store(aux, val);
5308 	else if (!bpf_map_key_poisoned(aux) &&
5309 		  bpf_map_key_immediate(aux) != val)
5310 		bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5311 	return 0;
5312 }
5313 
check_reference_leak(struct bpf_verifier_env * env)5314 static int check_reference_leak(struct bpf_verifier_env *env)
5315 {
5316 	struct bpf_func_state *state = cur_func(env);
5317 	int i;
5318 
5319 	for (i = 0; i < state->acquired_refs; i++) {
5320 		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
5321 			state->refs[i].id, state->refs[i].insn_idx);
5322 	}
5323 	return state->acquired_refs ? -EINVAL : 0;
5324 }
5325 
check_helper_call(struct bpf_verifier_env * env,int func_id,int insn_idx)5326 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
5327 {
5328 	const struct bpf_func_proto *fn = NULL;
5329 	struct bpf_reg_state *regs;
5330 	struct bpf_call_arg_meta meta;
5331 	bool changes_data;
5332 	int i, err;
5333 
5334 	/* find function prototype */
5335 	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5336 		verbose(env, "invalid func %s#%d\n", func_id_name(func_id),
5337 			func_id);
5338 		return -EINVAL;
5339 	}
5340 
5341 	if (env->ops->get_func_proto)
5342 		fn = env->ops->get_func_proto(func_id, env->prog);
5343 	if (!fn) {
5344 		verbose(env, "unknown func %s#%d\n", func_id_name(func_id),
5345 			func_id);
5346 		return -EINVAL;
5347 	}
5348 
5349 	/* eBPF programs must be GPL compatible to use GPL-ed functions */
5350 	if (!env->prog->gpl_compatible && fn->gpl_only) {
5351 		verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5352 		return -EINVAL;
5353 	}
5354 
5355 	if (fn->allowed && !fn->allowed(env->prog)) {
5356 		verbose(env, "helper call is not allowed in probe\n");
5357 		return -EINVAL;
5358 	}
5359 
5360 	/* With LD_ABS/IND some JITs save/restore skb from r1. */
5361 	changes_data = bpf_helper_changes_pkt_data(fn->func);
5362 	if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
5363 		verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n",
5364 			func_id_name(func_id), func_id);
5365 		return -EINVAL;
5366 	}
5367 
5368 	memset(&meta, 0, sizeof(meta));
5369 	meta.pkt_access = fn->pkt_access;
5370 
5371 	err = check_func_proto(fn, func_id);
5372 	if (err) {
5373 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
5374 			func_id_name(func_id), func_id);
5375 		return err;
5376 	}
5377 
5378 	meta.func_id = func_id;
5379 	/* check args */
5380 	for (i = 0; i < 5; i++) {
5381 		err = check_func_arg(env, i, &meta, fn);
5382 		if (err)
5383 			return err;
5384 	}
5385 
5386 	err = record_func_map(env, &meta, func_id, insn_idx);
5387 	if (err)
5388 		return err;
5389 
5390 	err = record_func_key(env, &meta, func_id, insn_idx);
5391 	if (err)
5392 		return err;
5393 
5394 	/* Mark slots with STACK_MISC in case of raw mode, stack offset
5395 	 * is inferred from register state.
5396 	 */
5397 	for (i = 0; i < meta.access_size; i++) {
5398 		err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B,
5399 				       BPF_WRITE, -1, false);
5400 		if (err)
5401 			return err;
5402 	}
5403 
5404 	if (func_id == BPF_FUNC_tail_call) {
5405 		err = check_reference_leak(env);
5406 		if (err) {
5407 			verbose(env, "tail_call would lead to reference leak\n");
5408 			return err;
5409 		}
5410 	} else if (is_release_function(func_id)) {
5411 		err = release_reference(env, meta.ref_obj_id);
5412 		if (err) {
5413 			verbose(env, "func %s#%d reference has not been acquired before\n",
5414 				func_id_name(func_id), func_id);
5415 			return err;
5416 		}
5417 	}
5418 
5419 	regs = cur_regs(env);
5420 
5421 	/* check that flags argument in get_local_storage(map, flags) is 0,
5422 	 * this is required because get_local_storage() can't return an error.
5423 	 */
5424 	if (func_id == BPF_FUNC_get_local_storage &&
5425 	    !register_is_null(&regs[BPF_REG_2])) {
5426 		verbose(env, "get_local_storage() doesn't support non-zero flags\n");
5427 		return -EINVAL;
5428 	}
5429 
5430 	/* reset caller saved regs */
5431 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
5432 		mark_reg_not_init(env, regs, caller_saved[i]);
5433 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5434 	}
5435 
5436 	/* helper call returns 64-bit value. */
5437 	regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5438 
5439 	/* update return register (already marked as written above) */
5440 	if (fn->ret_type == RET_INTEGER) {
5441 		/* sets type to SCALAR_VALUE */
5442 		mark_reg_unknown(env, regs, BPF_REG_0);
5443 	} else if (fn->ret_type == RET_VOID) {
5444 		regs[BPF_REG_0].type = NOT_INIT;
5445 	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL ||
5446 		   fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5447 		/* There is no offset yet applied, variable or fixed */
5448 		mark_reg_known_zero(env, regs, BPF_REG_0);
5449 		/* remember map_ptr, so that check_map_access()
5450 		 * can check 'value_size' boundary of memory access
5451 		 * to map element returned from bpf_map_lookup_elem()
5452 		 */
5453 		if (meta.map_ptr == NULL) {
5454 			verbose(env,
5455 				"kernel subsystem misconfigured verifier\n");
5456 			return -EINVAL;
5457 		}
5458 		regs[BPF_REG_0].map_ptr = meta.map_ptr;
5459 		if (fn->ret_type == RET_PTR_TO_MAP_VALUE) {
5460 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE;
5461 			if (map_value_has_spin_lock(meta.map_ptr))
5462 				regs[BPF_REG_0].id = ++env->id_gen;
5463 		} else {
5464 			regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
5465 		}
5466 	} else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) {
5467 		mark_reg_known_zero(env, regs, BPF_REG_0);
5468 		regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL;
5469 	} else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) {
5470 		mark_reg_known_zero(env, regs, BPF_REG_0);
5471 		regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL;
5472 	} else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) {
5473 		mark_reg_known_zero(env, regs, BPF_REG_0);
5474 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
5475 	} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
5476 		mark_reg_known_zero(env, regs, BPF_REG_0);
5477 		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
5478 		regs[BPF_REG_0].mem_size = meta.mem_size;
5479 	} else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL ||
5480 		   fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) {
5481 		const struct btf_type *t;
5482 
5483 		mark_reg_known_zero(env, regs, BPF_REG_0);
5484 		t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5485 		if (!btf_type_is_struct(t)) {
5486 			u32 tsize;
5487 			const struct btf_type *ret;
5488 			const char *tname;
5489 
5490 			/* resolve the type size of ksym. */
5491 			ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5492 			if (IS_ERR(ret)) {
5493 				tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5494 				verbose(env, "unable to resolve the size of type '%s': %ld\n",
5495 					tname, PTR_ERR(ret));
5496 				return -EINVAL;
5497 			}
5498 			regs[BPF_REG_0].type =
5499 				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5500 				PTR_TO_MEM : PTR_TO_MEM_OR_NULL;
5501 			regs[BPF_REG_0].mem_size = tsize;
5502 		} else {
5503 			regs[BPF_REG_0].type =
5504 				fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ?
5505 				PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL;
5506 			regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5507 		}
5508 	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
5509 		int ret_btf_id;
5510 
5511 		mark_reg_known_zero(env, regs, BPF_REG_0);
5512 		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
5513 		ret_btf_id = *fn->ret_btf_id;
5514 		if (ret_btf_id == 0) {
5515 			verbose(env, "invalid return type %d of func %s#%d\n",
5516 				fn->ret_type, func_id_name(func_id), func_id);
5517 			return -EINVAL;
5518 		}
5519 		regs[BPF_REG_0].btf_id = ret_btf_id;
5520 	} else {
5521 		verbose(env, "unknown return type %d of func %s#%d\n",
5522 			fn->ret_type, func_id_name(func_id), func_id);
5523 		return -EINVAL;
5524 	}
5525 
5526 	if (reg_type_may_be_null(regs[BPF_REG_0].type))
5527 		regs[BPF_REG_0].id = ++env->id_gen;
5528 
5529 	if (is_ptr_cast_function(func_id)) {
5530 		/* For release_reference() */
5531 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5532 	} else if (is_acquire_function(func_id, meta.map_ptr)) {
5533 		int id = acquire_reference_state(env, insn_idx);
5534 
5535 		if (id < 0)
5536 			return id;
5537 		/* For mark_ptr_or_null_reg() */
5538 		regs[BPF_REG_0].id = id;
5539 		/* For release_reference() */
5540 		regs[BPF_REG_0].ref_obj_id = id;
5541 	}
5542 
5543 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
5544 
5545 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5546 	if (err)
5547 		return err;
5548 
5549 	if ((func_id == BPF_FUNC_get_stack ||
5550 	     func_id == BPF_FUNC_get_task_stack) &&
5551 	    !env->prog->has_callchain_buf) {
5552 		const char *err_str;
5553 
5554 #ifdef CONFIG_PERF_EVENTS
5555 		err = get_callchain_buffers(sysctl_perf_event_max_stack);
5556 		err_str = "cannot get callchain buffer for func %s#%d\n";
5557 #else
5558 		err = -ENOTSUPP;
5559 		err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
5560 #endif
5561 		if (err) {
5562 			verbose(env, err_str, func_id_name(func_id), func_id);
5563 			return err;
5564 		}
5565 
5566 		env->prog->has_callchain_buf = true;
5567 	}
5568 
5569 	if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack)
5570 		env->prog->call_get_stack = true;
5571 
5572 	if (changes_data)
5573 		clear_all_pkt_pointers(env);
5574 	return 0;
5575 }
5576 
signed_add_overflows(s64 a,s64 b)5577 static bool signed_add_overflows(s64 a, s64 b)
5578 {
5579 	/* Do the add in u64, where overflow is well-defined */
5580 	s64 res = (s64)((u64)a + (u64)b);
5581 
5582 	if (b < 0)
5583 		return res > a;
5584 	return res < a;
5585 }
5586 
signed_add32_overflows(s32 a,s32 b)5587 static bool signed_add32_overflows(s32 a, s32 b)
5588 {
5589 	/* Do the add in u32, where overflow is well-defined */
5590 	s32 res = (s32)((u32)a + (u32)b);
5591 
5592 	if (b < 0)
5593 		return res > a;
5594 	return res < a;
5595 }
5596 
signed_sub_overflows(s64 a,s64 b)5597 static bool signed_sub_overflows(s64 a, s64 b)
5598 {
5599 	/* Do the sub in u64, where overflow is well-defined */
5600 	s64 res = (s64)((u64)a - (u64)b);
5601 
5602 	if (b < 0)
5603 		return res < a;
5604 	return res > a;
5605 }
5606 
signed_sub32_overflows(s32 a,s32 b)5607 static bool signed_sub32_overflows(s32 a, s32 b)
5608 {
5609 	/* Do the sub in u32, where overflow is well-defined */
5610 	s32 res = (s32)((u32)a - (u32)b);
5611 
5612 	if (b < 0)
5613 		return res < a;
5614 	return res > a;
5615 }
5616 
check_reg_sane_offset(struct bpf_verifier_env * env,const struct bpf_reg_state * reg,enum bpf_reg_type type)5617 static bool check_reg_sane_offset(struct bpf_verifier_env *env,
5618 				  const struct bpf_reg_state *reg,
5619 				  enum bpf_reg_type type)
5620 {
5621 	bool known = tnum_is_const(reg->var_off);
5622 	s64 val = reg->var_off.value;
5623 	s64 smin = reg->smin_value;
5624 
5625 	if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
5626 		verbose(env, "math between %s pointer and %lld is not allowed\n",
5627 			reg_type_str[type], val);
5628 		return false;
5629 	}
5630 
5631 	if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
5632 		verbose(env, "%s pointer offset %d is not allowed\n",
5633 			reg_type_str[type], reg->off);
5634 		return false;
5635 	}
5636 
5637 	if (smin == S64_MIN) {
5638 		verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
5639 			reg_type_str[type]);
5640 		return false;
5641 	}
5642 
5643 	if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
5644 		verbose(env, "value %lld makes %s pointer be out of bounds\n",
5645 			smin, reg_type_str[type]);
5646 		return false;
5647 	}
5648 
5649 	return true;
5650 }
5651 
cur_aux(struct bpf_verifier_env * env)5652 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
5653 {
5654 	return &env->insn_aux_data[env->insn_idx];
5655 }
5656 
5657 enum {
5658 	REASON_BOUNDS	= -1,
5659 	REASON_TYPE	= -2,
5660 	REASON_PATHS	= -3,
5661 	REASON_LIMIT	= -4,
5662 	REASON_STACK	= -5,
5663 };
5664 
retrieve_ptr_limit(const struct bpf_reg_state * ptr_reg,u32 * alu_limit,bool mask_to_left)5665 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
5666 			      u32 *alu_limit, bool mask_to_left)
5667 {
5668 	u32 max = 0, ptr_limit = 0;
5669 
5670 	switch (ptr_reg->type) {
5671 	case PTR_TO_STACK:
5672 		/* Offset 0 is out-of-bounds, but acceptable start for the
5673 		 * left direction, see BPF_REG_FP. Also, unknown scalar
5674 		 * offset where we would need to deal with min/max bounds is
5675 		 * currently prohibited for unprivileged.
5676 		 */
5677 		max = MAX_BPF_STACK + mask_to_left;
5678 		ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
5679 		break;
5680 	case PTR_TO_MAP_VALUE:
5681 		max = ptr_reg->map_ptr->value_size;
5682 		ptr_limit = (mask_to_left ?
5683 			     ptr_reg->smin_value :
5684 			     ptr_reg->umax_value) + ptr_reg->off;
5685 		break;
5686 	default:
5687 		return REASON_TYPE;
5688 	}
5689 
5690 	if (ptr_limit >= max)
5691 		return REASON_LIMIT;
5692 	*alu_limit = ptr_limit;
5693 	return 0;
5694 }
5695 
can_skip_alu_sanitation(const struct bpf_verifier_env * env,const struct bpf_insn * insn)5696 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
5697 				    const struct bpf_insn *insn)
5698 {
5699 	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5700 }
5701 
update_alu_sanitation_state(struct bpf_insn_aux_data * aux,u32 alu_state,u32 alu_limit)5702 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
5703 				       u32 alu_state, u32 alu_limit)
5704 {
5705 	/* If we arrived here from different branches with different
5706 	 * state or limits to sanitize, then this won't work.
5707 	 */
5708 	if (aux->alu_state &&
5709 	    (aux->alu_state != alu_state ||
5710 	     aux->alu_limit != alu_limit))
5711 		return REASON_PATHS;
5712 
5713 	/* Corresponding fixup done in fixup_bpf_calls(). */
5714 	aux->alu_state = alu_state;
5715 	aux->alu_limit = alu_limit;
5716 	return 0;
5717 }
5718 
sanitize_val_alu(struct bpf_verifier_env * env,struct bpf_insn * insn)5719 static int sanitize_val_alu(struct bpf_verifier_env *env,
5720 			    struct bpf_insn *insn)
5721 {
5722 	struct bpf_insn_aux_data *aux = cur_aux(env);
5723 
5724 	if (can_skip_alu_sanitation(env, insn))
5725 		return 0;
5726 
5727 	return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
5728 }
5729 
sanitize_needed(u8 opcode)5730 static bool sanitize_needed(u8 opcode)
5731 {
5732 	return opcode == BPF_ADD || opcode == BPF_SUB;
5733 }
5734 
5735 struct bpf_sanitize_info {
5736 	struct bpf_insn_aux_data aux;
5737 	bool mask_to_left;
5738 };
5739 
5740 static struct bpf_verifier_state *
sanitize_speculative_path(struct bpf_verifier_env * env,const struct bpf_insn * insn,u32 next_idx,u32 curr_idx)5741 sanitize_speculative_path(struct bpf_verifier_env *env,
5742 			  const struct bpf_insn *insn,
5743 			  u32 next_idx, u32 curr_idx)
5744 {
5745 	struct bpf_verifier_state *branch;
5746 	struct bpf_reg_state *regs;
5747 
5748 	branch = push_stack(env, next_idx, curr_idx, true);
5749 	if (branch && insn) {
5750 		regs = branch->frame[branch->curframe]->regs;
5751 		if (BPF_SRC(insn->code) == BPF_K) {
5752 			mark_reg_unknown(env, regs, insn->dst_reg);
5753 		} else if (BPF_SRC(insn->code) == BPF_X) {
5754 			mark_reg_unknown(env, regs, insn->dst_reg);
5755 			mark_reg_unknown(env, regs, insn->src_reg);
5756 		}
5757 	}
5758 	return branch;
5759 }
5760 
sanitize_ptr_alu(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg,struct bpf_reg_state * dst_reg,struct bpf_sanitize_info * info,const bool commit_window)5761 static int sanitize_ptr_alu(struct bpf_verifier_env *env,
5762 			    struct bpf_insn *insn,
5763 			    const struct bpf_reg_state *ptr_reg,
5764 			    const struct bpf_reg_state *off_reg,
5765 			    struct bpf_reg_state *dst_reg,
5766 			    struct bpf_sanitize_info *info,
5767 			    const bool commit_window)
5768 {
5769 	struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
5770 	struct bpf_verifier_state *vstate = env->cur_state;
5771 	bool off_is_imm = tnum_is_const(off_reg->var_off);
5772 	bool off_is_neg = off_reg->smin_value < 0;
5773 	bool ptr_is_dst_reg = ptr_reg == dst_reg;
5774 	u8 opcode = BPF_OP(insn->code);
5775 	u32 alu_state, alu_limit;
5776 	struct bpf_reg_state tmp;
5777 	bool ret;
5778 	int err;
5779 
5780 	if (can_skip_alu_sanitation(env, insn))
5781 		return 0;
5782 
5783 	/* We already marked aux for masking from non-speculative
5784 	 * paths, thus we got here in the first place. We only care
5785 	 * to explore bad access from here.
5786 	 */
5787 	if (vstate->speculative)
5788 		goto do_sim;
5789 
5790 	if (!commit_window) {
5791 		if (!tnum_is_const(off_reg->var_off) &&
5792 		    (off_reg->smin_value < 0) != (off_reg->smax_value < 0))
5793 			return REASON_BOUNDS;
5794 
5795 		info->mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
5796 				     (opcode == BPF_SUB && !off_is_neg);
5797 	}
5798 
5799 	err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
5800 	if (err < 0)
5801 		return err;
5802 
5803 	if (commit_window) {
5804 		/* In commit phase we narrow the masking window based on
5805 		 * the observed pointer move after the simulated operation.
5806 		 */
5807 		alu_state = info->aux.alu_state;
5808 		alu_limit = abs(info->aux.alu_limit - alu_limit);
5809 	} else {
5810 		alu_state  = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
5811 		alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
5812 		alu_state |= ptr_is_dst_reg ?
5813 			     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
5814 
5815 		/* Limit pruning on unknown scalars to enable deep search for
5816 		 * potential masking differences from other program paths.
5817 		 */
5818 		if (!off_is_imm)
5819 			env->explore_alu_limits = true;
5820 	}
5821 
5822 	err = update_alu_sanitation_state(aux, alu_state, alu_limit);
5823 	if (err < 0)
5824 		return err;
5825 do_sim:
5826 	/* If we're in commit phase, we're done here given we already
5827 	 * pushed the truncated dst_reg into the speculative verification
5828 	 * stack.
5829 	 *
5830 	 * Also, when register is a known constant, we rewrite register-based
5831 	 * operation to immediate-based, and thus do not need masking (and as
5832 	 * a consequence, do not need to simulate the zero-truncation either).
5833 	 */
5834 	if (commit_window || off_is_imm)
5835 		return 0;
5836 
5837 	/* Simulate and find potential out-of-bounds access under
5838 	 * speculative execution from truncation as a result of
5839 	 * masking when off was not within expected range. If off
5840 	 * sits in dst, then we temporarily need to move ptr there
5841 	 * to simulate dst (== 0) +/-= ptr. Needed, for example,
5842 	 * for cases where we use K-based arithmetic in one direction
5843 	 * and truncated reg-based in the other in order to explore
5844 	 * bad access.
5845 	 */
5846 	if (!ptr_is_dst_reg) {
5847 		tmp = *dst_reg;
5848 		*dst_reg = *ptr_reg;
5849 	}
5850 	ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1,
5851 					env->insn_idx);
5852 	if (!ptr_is_dst_reg && ret)
5853 		*dst_reg = tmp;
5854 	return !ret ? REASON_STACK : 0;
5855 }
5856 
sanitize_mark_insn_seen(struct bpf_verifier_env * env)5857 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
5858 {
5859 	struct bpf_verifier_state *vstate = env->cur_state;
5860 
5861 	/* If we simulate paths under speculation, we don't update the
5862 	 * insn as 'seen' such that when we verify unreachable paths in
5863 	 * the non-speculative domain, sanitize_dead_code() can still
5864 	 * rewrite/sanitize them.
5865 	 */
5866 	if (!vstate->speculative)
5867 		env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
5868 }
5869 
sanitize_err(struct bpf_verifier_env * env,const struct bpf_insn * insn,int reason,const struct bpf_reg_state * off_reg,const struct bpf_reg_state * dst_reg)5870 static int sanitize_err(struct bpf_verifier_env *env,
5871 			const struct bpf_insn *insn, int reason,
5872 			const struct bpf_reg_state *off_reg,
5873 			const struct bpf_reg_state *dst_reg)
5874 {
5875 	static const char *err = "pointer arithmetic with it prohibited for !root";
5876 	const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
5877 	u32 dst = insn->dst_reg, src = insn->src_reg;
5878 
5879 	switch (reason) {
5880 	case REASON_BOUNDS:
5881 		verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n",
5882 			off_reg == dst_reg ? dst : src, err);
5883 		break;
5884 	case REASON_TYPE:
5885 		verbose(env, "R%d has pointer with unsupported alu operation, %s\n",
5886 			off_reg == dst_reg ? src : dst, err);
5887 		break;
5888 	case REASON_PATHS:
5889 		verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n",
5890 			dst, op, err);
5891 		break;
5892 	case REASON_LIMIT:
5893 		verbose(env, "R%d tried to %s beyond pointer bounds, %s\n",
5894 			dst, op, err);
5895 		break;
5896 	case REASON_STACK:
5897 		verbose(env, "R%d could not be pushed for speculative verification, %s\n",
5898 			dst, err);
5899 		break;
5900 	default:
5901 		verbose(env, "verifier internal error: unknown reason (%d)\n",
5902 			reason);
5903 		break;
5904 	}
5905 
5906 	return -EACCES;
5907 }
5908 
5909 /* check that stack access falls within stack limits and that 'reg' doesn't
5910  * have a variable offset.
5911  *
5912  * Variable offset is prohibited for unprivileged mode for simplicity since it
5913  * requires corresponding support in Spectre masking for stack ALU.  See also
5914  * retrieve_ptr_limit().
5915  *
5916  *
5917  * 'off' includes 'reg->off'.
5918  */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env * env,int regno,const struct bpf_reg_state * reg,int off)5919 static int check_stack_access_for_ptr_arithmetic(
5920 				struct bpf_verifier_env *env,
5921 				int regno,
5922 				const struct bpf_reg_state *reg,
5923 				int off)
5924 {
5925 	if (!tnum_is_const(reg->var_off)) {
5926 		char tn_buf[48];
5927 
5928 		tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5929 		verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n",
5930 			regno, tn_buf, off);
5931 		return -EACCES;
5932 	}
5933 
5934 	if (off >= 0 || off < -MAX_BPF_STACK) {
5935 		verbose(env, "R%d stack pointer arithmetic goes out of range, "
5936 			"prohibited for !root; off=%d\n", regno, off);
5937 		return -EACCES;
5938 	}
5939 
5940 	return 0;
5941 }
5942 
sanitize_check_bounds(struct bpf_verifier_env * env,const struct bpf_insn * insn,const struct bpf_reg_state * dst_reg)5943 static int sanitize_check_bounds(struct bpf_verifier_env *env,
5944 				 const struct bpf_insn *insn,
5945 				 const struct bpf_reg_state *dst_reg)
5946 {
5947 	u32 dst = insn->dst_reg;
5948 
5949 	/* For unprivileged we require that resulting offset must be in bounds
5950 	 * in order to be able to sanitize access later on.
5951 	 */
5952 	if (env->bypass_spec_v1)
5953 		return 0;
5954 
5955 	switch (dst_reg->type) {
5956 	case PTR_TO_STACK:
5957 		if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg,
5958 					dst_reg->off + dst_reg->var_off.value))
5959 			return -EACCES;
5960 		break;
5961 	case PTR_TO_MAP_VALUE:
5962 		if (check_map_access(env, dst, dst_reg->off, 1, false)) {
5963 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
5964 				"prohibited for !root\n", dst);
5965 			return -EACCES;
5966 		}
5967 		break;
5968 	default:
5969 		break;
5970 	}
5971 
5972 	return 0;
5973 }
5974 
5975 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
5976  * Caller should also handle BPF_MOV case separately.
5977  * If we return -EACCES, caller may want to try again treating pointer as a
5978  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
5979  */
adjust_ptr_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,const struct bpf_reg_state * ptr_reg,const struct bpf_reg_state * off_reg)5980 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
5981 				   struct bpf_insn *insn,
5982 				   const struct bpf_reg_state *ptr_reg,
5983 				   const struct bpf_reg_state *off_reg)
5984 {
5985 	struct bpf_verifier_state *vstate = env->cur_state;
5986 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
5987 	struct bpf_reg_state *regs = state->regs, *dst_reg;
5988 	bool known = tnum_is_const(off_reg->var_off);
5989 	s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value,
5990 	    smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value;
5991 	u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value,
5992 	    umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value;
5993 	struct bpf_sanitize_info info = {};
5994 	u8 opcode = BPF_OP(insn->code);
5995 	u32 dst = insn->dst_reg;
5996 	int ret;
5997 
5998 	dst_reg = &regs[dst];
5999 
6000 	if ((known && (smin_val != smax_val || umin_val != umax_val)) ||
6001 	    smin_val > smax_val || umin_val > umax_val) {
6002 		/* Taint dst register if offset had invalid bounds derived from
6003 		 * e.g. dead branches.
6004 		 */
6005 		__mark_reg_unknown(env, dst_reg);
6006 		return 0;
6007 	}
6008 
6009 	if (BPF_CLASS(insn->code) != BPF_ALU64) {
6010 		/* 32-bit ALU ops on pointers produce (meaningless) scalars */
6011 		if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6012 			__mark_reg_unknown(env, dst_reg);
6013 			return 0;
6014 		}
6015 
6016 		verbose(env,
6017 			"R%d 32-bit pointer arithmetic prohibited\n",
6018 			dst);
6019 		return -EACCES;
6020 	}
6021 
6022 	switch (ptr_reg->type) {
6023 	case PTR_TO_MAP_VALUE_OR_NULL:
6024 		verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n",
6025 			dst, reg_type_str[ptr_reg->type]);
6026 		return -EACCES;
6027 	case CONST_PTR_TO_MAP:
6028 		/* smin_val represents the known value */
6029 		if (known && smin_val == 0 && opcode == BPF_ADD)
6030 			break;
6031 		fallthrough;
6032 	case PTR_TO_PACKET_END:
6033 	case PTR_TO_SOCKET:
6034 	case PTR_TO_SOCK_COMMON:
6035 	case PTR_TO_TCP_SOCK:
6036 	case PTR_TO_XDP_SOCK:
6037 reject:
6038 		verbose(env, "R%d pointer arithmetic on %s prohibited\n",
6039 			dst, reg_type_str[ptr_reg->type]);
6040 		return -EACCES;
6041 	default:
6042 		if (reg_type_may_be_null(ptr_reg->type))
6043 			goto reject;
6044 		break;
6045 	}
6046 
6047 	/* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
6048 	 * The id may be overwritten later if we create a new variable offset.
6049 	 */
6050 	dst_reg->type = ptr_reg->type;
6051 	dst_reg->id = ptr_reg->id;
6052 
6053 	if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) ||
6054 	    !check_reg_sane_offset(env, ptr_reg, ptr_reg->type))
6055 		return -EINVAL;
6056 
6057 	/* pointer types do not carry 32-bit bounds at the moment. */
6058 	__mark_reg32_unbounded(dst_reg);
6059 
6060 	if (sanitize_needed(opcode)) {
6061 		ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg,
6062 				       &info, false);
6063 		if (ret < 0)
6064 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
6065 	}
6066 
6067 	switch (opcode) {
6068 	case BPF_ADD:
6069 		/* We can take a fixed offset as long as it doesn't overflow
6070 		 * the s32 'off' field
6071 		 */
6072 		if (known && (ptr_reg->off + smin_val ==
6073 			      (s64)(s32)(ptr_reg->off + smin_val))) {
6074 			/* pointer += K.  Accumulate it into fixed offset */
6075 			dst_reg->smin_value = smin_ptr;
6076 			dst_reg->smax_value = smax_ptr;
6077 			dst_reg->umin_value = umin_ptr;
6078 			dst_reg->umax_value = umax_ptr;
6079 			dst_reg->var_off = ptr_reg->var_off;
6080 			dst_reg->off = ptr_reg->off + smin_val;
6081 			dst_reg->raw = ptr_reg->raw;
6082 			break;
6083 		}
6084 		/* A new variable offset is created.  Note that off_reg->off
6085 		 * == 0, since it's a scalar.
6086 		 * dst_reg gets the pointer type and since some positive
6087 		 * integer value was added to the pointer, give it a new 'id'
6088 		 * if it's a PTR_TO_PACKET.
6089 		 * this creates a new 'base' pointer, off_reg (variable) gets
6090 		 * added into the variable offset, and we copy the fixed offset
6091 		 * from ptr_reg.
6092 		 */
6093 		if (signed_add_overflows(smin_ptr, smin_val) ||
6094 		    signed_add_overflows(smax_ptr, smax_val)) {
6095 			dst_reg->smin_value = S64_MIN;
6096 			dst_reg->smax_value = S64_MAX;
6097 		} else {
6098 			dst_reg->smin_value = smin_ptr + smin_val;
6099 			dst_reg->smax_value = smax_ptr + smax_val;
6100 		}
6101 		if (umin_ptr + umin_val < umin_ptr ||
6102 		    umax_ptr + umax_val < umax_ptr) {
6103 			dst_reg->umin_value = 0;
6104 			dst_reg->umax_value = U64_MAX;
6105 		} else {
6106 			dst_reg->umin_value = umin_ptr + umin_val;
6107 			dst_reg->umax_value = umax_ptr + umax_val;
6108 		}
6109 		dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
6110 		dst_reg->off = ptr_reg->off;
6111 		dst_reg->raw = ptr_reg->raw;
6112 		if (reg_is_pkt_pointer(ptr_reg)) {
6113 			dst_reg->id = ++env->id_gen;
6114 			/* something was added to pkt_ptr, set range to zero */
6115 			dst_reg->raw = 0;
6116 		}
6117 		break;
6118 	case BPF_SUB:
6119 		if (dst_reg == off_reg) {
6120 			/* scalar -= pointer.  Creates an unknown scalar */
6121 			verbose(env, "R%d tried to subtract pointer from scalar\n",
6122 				dst);
6123 			return -EACCES;
6124 		}
6125 		/* We don't allow subtraction from FP, because (according to
6126 		 * test_verifier.c test "invalid fp arithmetic", JITs might not
6127 		 * be able to deal with it.
6128 		 */
6129 		if (ptr_reg->type == PTR_TO_STACK) {
6130 			verbose(env, "R%d subtraction from stack pointer prohibited\n",
6131 				dst);
6132 			return -EACCES;
6133 		}
6134 		if (known && (ptr_reg->off - smin_val ==
6135 			      (s64)(s32)(ptr_reg->off - smin_val))) {
6136 			/* pointer -= K.  Subtract it from fixed offset */
6137 			dst_reg->smin_value = smin_ptr;
6138 			dst_reg->smax_value = smax_ptr;
6139 			dst_reg->umin_value = umin_ptr;
6140 			dst_reg->umax_value = umax_ptr;
6141 			dst_reg->var_off = ptr_reg->var_off;
6142 			dst_reg->id = ptr_reg->id;
6143 			dst_reg->off = ptr_reg->off - smin_val;
6144 			dst_reg->raw = ptr_reg->raw;
6145 			break;
6146 		}
6147 		/* A new variable offset is created.  If the subtrahend is known
6148 		 * nonnegative, then any reg->range we had before is still good.
6149 		 */
6150 		if (signed_sub_overflows(smin_ptr, smax_val) ||
6151 		    signed_sub_overflows(smax_ptr, smin_val)) {
6152 			/* Overflow possible, we know nothing */
6153 			dst_reg->smin_value = S64_MIN;
6154 			dst_reg->smax_value = S64_MAX;
6155 		} else {
6156 			dst_reg->smin_value = smin_ptr - smax_val;
6157 			dst_reg->smax_value = smax_ptr - smin_val;
6158 		}
6159 		if (umin_ptr < umax_val) {
6160 			/* Overflow possible, we know nothing */
6161 			dst_reg->umin_value = 0;
6162 			dst_reg->umax_value = U64_MAX;
6163 		} else {
6164 			/* Cannot overflow (as long as bounds are consistent) */
6165 			dst_reg->umin_value = umin_ptr - umax_val;
6166 			dst_reg->umax_value = umax_ptr - umin_val;
6167 		}
6168 		dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
6169 		dst_reg->off = ptr_reg->off;
6170 		dst_reg->raw = ptr_reg->raw;
6171 		if (reg_is_pkt_pointer(ptr_reg)) {
6172 			dst_reg->id = ++env->id_gen;
6173 			/* something was added to pkt_ptr, set range to zero */
6174 			if (smin_val < 0)
6175 				dst_reg->raw = 0;
6176 		}
6177 		break;
6178 	case BPF_AND:
6179 	case BPF_OR:
6180 	case BPF_XOR:
6181 		/* bitwise ops on pointers are troublesome, prohibit. */
6182 		verbose(env, "R%d bitwise operator %s on pointer prohibited\n",
6183 			dst, bpf_alu_string[opcode >> 4]);
6184 		return -EACCES;
6185 	default:
6186 		/* other operators (e.g. MUL,LSH) produce non-pointer results */
6187 		verbose(env, "R%d pointer arithmetic with %s operator prohibited\n",
6188 			dst, bpf_alu_string[opcode >> 4]);
6189 		return -EACCES;
6190 	}
6191 
6192 	if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
6193 		return -EINVAL;
6194 	reg_bounds_sync(dst_reg);
6195 	if (sanitize_check_bounds(env, insn, dst_reg) < 0)
6196 		return -EACCES;
6197 	if (sanitize_needed(opcode)) {
6198 		ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg,
6199 				       &info, true);
6200 		if (ret < 0)
6201 			return sanitize_err(env, insn, ret, off_reg, dst_reg);
6202 	}
6203 
6204 	return 0;
6205 }
6206 
scalar32_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6207 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg,
6208 				 struct bpf_reg_state *src_reg)
6209 {
6210 	s32 smin_val = src_reg->s32_min_value;
6211 	s32 smax_val = src_reg->s32_max_value;
6212 	u32 umin_val = src_reg->u32_min_value;
6213 	u32 umax_val = src_reg->u32_max_value;
6214 
6215 	if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6216 	    signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6217 		dst_reg->s32_min_value = S32_MIN;
6218 		dst_reg->s32_max_value = S32_MAX;
6219 	} else {
6220 		dst_reg->s32_min_value += smin_val;
6221 		dst_reg->s32_max_value += smax_val;
6222 	}
6223 	if (dst_reg->u32_min_value + umin_val < umin_val ||
6224 	    dst_reg->u32_max_value + umax_val < umax_val) {
6225 		dst_reg->u32_min_value = 0;
6226 		dst_reg->u32_max_value = U32_MAX;
6227 	} else {
6228 		dst_reg->u32_min_value += umin_val;
6229 		dst_reg->u32_max_value += umax_val;
6230 	}
6231 }
6232 
scalar_min_max_add(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6233 static void scalar_min_max_add(struct bpf_reg_state *dst_reg,
6234 			       struct bpf_reg_state *src_reg)
6235 {
6236 	s64 smin_val = src_reg->smin_value;
6237 	s64 smax_val = src_reg->smax_value;
6238 	u64 umin_val = src_reg->umin_value;
6239 	u64 umax_val = src_reg->umax_value;
6240 
6241 	if (signed_add_overflows(dst_reg->smin_value, smin_val) ||
6242 	    signed_add_overflows(dst_reg->smax_value, smax_val)) {
6243 		dst_reg->smin_value = S64_MIN;
6244 		dst_reg->smax_value = S64_MAX;
6245 	} else {
6246 		dst_reg->smin_value += smin_val;
6247 		dst_reg->smax_value += smax_val;
6248 	}
6249 	if (dst_reg->umin_value + umin_val < umin_val ||
6250 	    dst_reg->umax_value + umax_val < umax_val) {
6251 		dst_reg->umin_value = 0;
6252 		dst_reg->umax_value = U64_MAX;
6253 	} else {
6254 		dst_reg->umin_value += umin_val;
6255 		dst_reg->umax_value += umax_val;
6256 	}
6257 }
6258 
scalar32_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6259 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg,
6260 				 struct bpf_reg_state *src_reg)
6261 {
6262 	s32 smin_val = src_reg->s32_min_value;
6263 	s32 smax_val = src_reg->s32_max_value;
6264 	u32 umin_val = src_reg->u32_min_value;
6265 	u32 umax_val = src_reg->u32_max_value;
6266 
6267 	if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6268 	    signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6269 		/* Overflow possible, we know nothing */
6270 		dst_reg->s32_min_value = S32_MIN;
6271 		dst_reg->s32_max_value = S32_MAX;
6272 	} else {
6273 		dst_reg->s32_min_value -= smax_val;
6274 		dst_reg->s32_max_value -= smin_val;
6275 	}
6276 	if (dst_reg->u32_min_value < umax_val) {
6277 		/* Overflow possible, we know nothing */
6278 		dst_reg->u32_min_value = 0;
6279 		dst_reg->u32_max_value = U32_MAX;
6280 	} else {
6281 		/* Cannot overflow (as long as bounds are consistent) */
6282 		dst_reg->u32_min_value -= umax_val;
6283 		dst_reg->u32_max_value -= umin_val;
6284 	}
6285 }
6286 
scalar_min_max_sub(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6287 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg,
6288 			       struct bpf_reg_state *src_reg)
6289 {
6290 	s64 smin_val = src_reg->smin_value;
6291 	s64 smax_val = src_reg->smax_value;
6292 	u64 umin_val = src_reg->umin_value;
6293 	u64 umax_val = src_reg->umax_value;
6294 
6295 	if (signed_sub_overflows(dst_reg->smin_value, smax_val) ||
6296 	    signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6297 		/* Overflow possible, we know nothing */
6298 		dst_reg->smin_value = S64_MIN;
6299 		dst_reg->smax_value = S64_MAX;
6300 	} else {
6301 		dst_reg->smin_value -= smax_val;
6302 		dst_reg->smax_value -= smin_val;
6303 	}
6304 	if (dst_reg->umin_value < umax_val) {
6305 		/* Overflow possible, we know nothing */
6306 		dst_reg->umin_value = 0;
6307 		dst_reg->umax_value = U64_MAX;
6308 	} else {
6309 		/* Cannot overflow (as long as bounds are consistent) */
6310 		dst_reg->umin_value -= umax_val;
6311 		dst_reg->umax_value -= umin_val;
6312 	}
6313 }
6314 
scalar32_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6315 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg,
6316 				 struct bpf_reg_state *src_reg)
6317 {
6318 	s32 smin_val = src_reg->s32_min_value;
6319 	u32 umin_val = src_reg->u32_min_value;
6320 	u32 umax_val = src_reg->u32_max_value;
6321 
6322 	if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6323 		/* Ain't nobody got time to multiply that sign */
6324 		__mark_reg32_unbounded(dst_reg);
6325 		return;
6326 	}
6327 	/* Both values are positive, so we can work with unsigned and
6328 	 * copy the result to signed (unless it exceeds S32_MAX).
6329 	 */
6330 	if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6331 		/* Potential overflow, we know nothing */
6332 		__mark_reg32_unbounded(dst_reg);
6333 		return;
6334 	}
6335 	dst_reg->u32_min_value *= umin_val;
6336 	dst_reg->u32_max_value *= umax_val;
6337 	if (dst_reg->u32_max_value > S32_MAX) {
6338 		/* Overflow possible, we know nothing */
6339 		dst_reg->s32_min_value = S32_MIN;
6340 		dst_reg->s32_max_value = S32_MAX;
6341 	} else {
6342 		dst_reg->s32_min_value = dst_reg->u32_min_value;
6343 		dst_reg->s32_max_value = dst_reg->u32_max_value;
6344 	}
6345 }
6346 
scalar_min_max_mul(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6347 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg,
6348 			       struct bpf_reg_state *src_reg)
6349 {
6350 	s64 smin_val = src_reg->smin_value;
6351 	u64 umin_val = src_reg->umin_value;
6352 	u64 umax_val = src_reg->umax_value;
6353 
6354 	if (smin_val < 0 || dst_reg->smin_value < 0) {
6355 		/* Ain't nobody got time to multiply that sign */
6356 		__mark_reg64_unbounded(dst_reg);
6357 		return;
6358 	}
6359 	/* Both values are positive, so we can work with unsigned and
6360 	 * copy the result to signed (unless it exceeds S64_MAX).
6361 	 */
6362 	if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6363 		/* Potential overflow, we know nothing */
6364 		__mark_reg64_unbounded(dst_reg);
6365 		return;
6366 	}
6367 	dst_reg->umin_value *= umin_val;
6368 	dst_reg->umax_value *= umax_val;
6369 	if (dst_reg->umax_value > S64_MAX) {
6370 		/* Overflow possible, we know nothing */
6371 		dst_reg->smin_value = S64_MIN;
6372 		dst_reg->smax_value = S64_MAX;
6373 	} else {
6374 		dst_reg->smin_value = dst_reg->umin_value;
6375 		dst_reg->smax_value = dst_reg->umax_value;
6376 	}
6377 }
6378 
scalar32_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6379 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg,
6380 				 struct bpf_reg_state *src_reg)
6381 {
6382 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
6383 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6384 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6385 	s32 smin_val = src_reg->s32_min_value;
6386 	u32 umax_val = src_reg->u32_max_value;
6387 
6388 	if (src_known && dst_known) {
6389 		__mark_reg32_known(dst_reg, var32_off.value);
6390 		return;
6391 	}
6392 
6393 	/* We get our minimum from the var_off, since that's inherently
6394 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
6395 	 */
6396 	dst_reg->u32_min_value = var32_off.value;
6397 	dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6398 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6399 		/* Lose signed bounds when ANDing negative numbers,
6400 		 * ain't nobody got time for that.
6401 		 */
6402 		dst_reg->s32_min_value = S32_MIN;
6403 		dst_reg->s32_max_value = S32_MAX;
6404 	} else {
6405 		/* ANDing two positives gives a positive, so safe to
6406 		 * cast result into s64.
6407 		 */
6408 		dst_reg->s32_min_value = dst_reg->u32_min_value;
6409 		dst_reg->s32_max_value = dst_reg->u32_max_value;
6410 	}
6411 }
6412 
scalar_min_max_and(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6413 static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
6414 			       struct bpf_reg_state *src_reg)
6415 {
6416 	bool src_known = tnum_is_const(src_reg->var_off);
6417 	bool dst_known = tnum_is_const(dst_reg->var_off);
6418 	s64 smin_val = src_reg->smin_value;
6419 	u64 umax_val = src_reg->umax_value;
6420 
6421 	if (src_known && dst_known) {
6422 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
6423 		return;
6424 	}
6425 
6426 	/* We get our minimum from the var_off, since that's inherently
6427 	 * bitwise.  Our maximum is the minimum of the operands' maxima.
6428 	 */
6429 	dst_reg->umin_value = dst_reg->var_off.value;
6430 	dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6431 	if (dst_reg->smin_value < 0 || smin_val < 0) {
6432 		/* Lose signed bounds when ANDing negative numbers,
6433 		 * ain't nobody got time for that.
6434 		 */
6435 		dst_reg->smin_value = S64_MIN;
6436 		dst_reg->smax_value = S64_MAX;
6437 	} else {
6438 		/* ANDing two positives gives a positive, so safe to
6439 		 * cast result into s64.
6440 		 */
6441 		dst_reg->smin_value = dst_reg->umin_value;
6442 		dst_reg->smax_value = dst_reg->umax_value;
6443 	}
6444 	/* We may learn something more from the var_off */
6445 	__update_reg_bounds(dst_reg);
6446 }
6447 
scalar32_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6448 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg,
6449 				struct bpf_reg_state *src_reg)
6450 {
6451 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
6452 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6453 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6454 	s32 smin_val = src_reg->s32_min_value;
6455 	u32 umin_val = src_reg->u32_min_value;
6456 
6457 	if (src_known && dst_known) {
6458 		__mark_reg32_known(dst_reg, var32_off.value);
6459 		return;
6460 	}
6461 
6462 	/* We get our maximum from the var_off, and our minimum is the
6463 	 * maximum of the operands' minima
6464 	 */
6465 	dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6466 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6467 	if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6468 		/* Lose signed bounds when ORing negative numbers,
6469 		 * ain't nobody got time for that.
6470 		 */
6471 		dst_reg->s32_min_value = S32_MIN;
6472 		dst_reg->s32_max_value = S32_MAX;
6473 	} else {
6474 		/* ORing two positives gives a positive, so safe to
6475 		 * cast result into s64.
6476 		 */
6477 		dst_reg->s32_min_value = dst_reg->u32_min_value;
6478 		dst_reg->s32_max_value = dst_reg->u32_max_value;
6479 	}
6480 }
6481 
scalar_min_max_or(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6482 static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
6483 			      struct bpf_reg_state *src_reg)
6484 {
6485 	bool src_known = tnum_is_const(src_reg->var_off);
6486 	bool dst_known = tnum_is_const(dst_reg->var_off);
6487 	s64 smin_val = src_reg->smin_value;
6488 	u64 umin_val = src_reg->umin_value;
6489 
6490 	if (src_known && dst_known) {
6491 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
6492 		return;
6493 	}
6494 
6495 	/* We get our maximum from the var_off, and our minimum is the
6496 	 * maximum of the operands' minima
6497 	 */
6498 	dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6499 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6500 	if (dst_reg->smin_value < 0 || smin_val < 0) {
6501 		/* Lose signed bounds when ORing negative numbers,
6502 		 * ain't nobody got time for that.
6503 		 */
6504 		dst_reg->smin_value = S64_MIN;
6505 		dst_reg->smax_value = S64_MAX;
6506 	} else {
6507 		/* ORing two positives gives a positive, so safe to
6508 		 * cast result into s64.
6509 		 */
6510 		dst_reg->smin_value = dst_reg->umin_value;
6511 		dst_reg->smax_value = dst_reg->umax_value;
6512 	}
6513 	/* We may learn something more from the var_off */
6514 	__update_reg_bounds(dst_reg);
6515 }
6516 
scalar32_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6517 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
6518 				 struct bpf_reg_state *src_reg)
6519 {
6520 	bool src_known = tnum_subreg_is_const(src_reg->var_off);
6521 	bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6522 	struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6523 	s32 smin_val = src_reg->s32_min_value;
6524 
6525 	if (src_known && dst_known) {
6526 		__mark_reg32_known(dst_reg, var32_off.value);
6527 		return;
6528 	}
6529 
6530 	/* We get both minimum and maximum from the var32_off. */
6531 	dst_reg->u32_min_value = var32_off.value;
6532 	dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6533 
6534 	if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6535 		/* XORing two positive sign numbers gives a positive,
6536 		 * so safe to cast u32 result into s32.
6537 		 */
6538 		dst_reg->s32_min_value = dst_reg->u32_min_value;
6539 		dst_reg->s32_max_value = dst_reg->u32_max_value;
6540 	} else {
6541 		dst_reg->s32_min_value = S32_MIN;
6542 		dst_reg->s32_max_value = S32_MAX;
6543 	}
6544 }
6545 
scalar_min_max_xor(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6546 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
6547 			       struct bpf_reg_state *src_reg)
6548 {
6549 	bool src_known = tnum_is_const(src_reg->var_off);
6550 	bool dst_known = tnum_is_const(dst_reg->var_off);
6551 	s64 smin_val = src_reg->smin_value;
6552 
6553 	if (src_known && dst_known) {
6554 		/* dst_reg->var_off.value has been updated earlier */
6555 		__mark_reg_known(dst_reg, dst_reg->var_off.value);
6556 		return;
6557 	}
6558 
6559 	/* We get both minimum and maximum from the var_off. */
6560 	dst_reg->umin_value = dst_reg->var_off.value;
6561 	dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6562 
6563 	if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6564 		/* XORing two positive sign numbers gives a positive,
6565 		 * so safe to cast u64 result into s64.
6566 		 */
6567 		dst_reg->smin_value = dst_reg->umin_value;
6568 		dst_reg->smax_value = dst_reg->umax_value;
6569 	} else {
6570 		dst_reg->smin_value = S64_MIN;
6571 		dst_reg->smax_value = S64_MAX;
6572 	}
6573 
6574 	__update_reg_bounds(dst_reg);
6575 }
6576 
__scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)6577 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6578 				   u64 umin_val, u64 umax_val)
6579 {
6580 	/* We lose all sign bit information (except what we can pick
6581 	 * up from var_off)
6582 	 */
6583 	dst_reg->s32_min_value = S32_MIN;
6584 	dst_reg->s32_max_value = S32_MAX;
6585 	/* If we might shift our top bit out, then we know nothing */
6586 	if (umax_val > 31 || dst_reg->u32_max_value > 1ULL << (31 - umax_val)) {
6587 		dst_reg->u32_min_value = 0;
6588 		dst_reg->u32_max_value = U32_MAX;
6589 	} else {
6590 		dst_reg->u32_min_value <<= umin_val;
6591 		dst_reg->u32_max_value <<= umax_val;
6592 	}
6593 }
6594 
scalar32_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6595 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
6596 				 struct bpf_reg_state *src_reg)
6597 {
6598 	u32 umax_val = src_reg->u32_max_value;
6599 	u32 umin_val = src_reg->u32_min_value;
6600 	/* u32 alu operation will zext upper bits */
6601 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
6602 
6603 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6604 	dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6605 	/* Not required but being careful mark reg64 bounds as unknown so
6606 	 * that we are forced to pick them up from tnum and zext later and
6607 	 * if some path skips this step we are still safe.
6608 	 */
6609 	__mark_reg64_unbounded(dst_reg);
6610 	__update_reg32_bounds(dst_reg);
6611 }
6612 
__scalar64_min_max_lsh(struct bpf_reg_state * dst_reg,u64 umin_val,u64 umax_val)6613 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg,
6614 				   u64 umin_val, u64 umax_val)
6615 {
6616 	/* Special case <<32 because it is a common compiler pattern to sign
6617 	 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6618 	 * positive we know this shift will also be positive so we can track
6619 	 * bounds correctly. Otherwise we lose all sign bit information except
6620 	 * what we can pick up from var_off. Perhaps we can generalize this
6621 	 * later to shifts of any length.
6622 	 */
6623 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_max_value >= 0)
6624 		dst_reg->smax_value = (s64)dst_reg->s32_max_value << 32;
6625 	else
6626 		dst_reg->smax_value = S64_MAX;
6627 
6628 	if (umin_val == 32 && umax_val == 32 && dst_reg->s32_min_value >= 0)
6629 		dst_reg->smin_value = (s64)dst_reg->s32_min_value << 32;
6630 	else
6631 		dst_reg->smin_value = S64_MIN;
6632 
6633 	/* If we might shift our top bit out, then we know nothing */
6634 	if (dst_reg->umax_value > 1ULL << (63 - umax_val)) {
6635 		dst_reg->umin_value = 0;
6636 		dst_reg->umax_value = U64_MAX;
6637 	} else {
6638 		dst_reg->umin_value <<= umin_val;
6639 		dst_reg->umax_value <<= umax_val;
6640 	}
6641 }
6642 
scalar_min_max_lsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6643 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg,
6644 			       struct bpf_reg_state *src_reg)
6645 {
6646 	u64 umax_val = src_reg->umax_value;
6647 	u64 umin_val = src_reg->umin_value;
6648 
6649 	/* scalar64 calc uses 32bit unshifted bounds so must be called first */
6650 	__scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6651 	__scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6652 
6653 	dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6654 	/* We may learn something more from the var_off */
6655 	__update_reg_bounds(dst_reg);
6656 }
6657 
scalar32_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6658 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg,
6659 				 struct bpf_reg_state *src_reg)
6660 {
6661 	struct tnum subreg = tnum_subreg(dst_reg->var_off);
6662 	u32 umax_val = src_reg->u32_max_value;
6663 	u32 umin_val = src_reg->u32_min_value;
6664 
6665 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6666 	 * be negative, then either:
6667 	 * 1) src_reg might be zero, so the sign bit of the result is
6668 	 *    unknown, so we lose our signed bounds
6669 	 * 2) it's known negative, thus the unsigned bounds capture the
6670 	 *    signed bounds
6671 	 * 3) the signed bounds cross zero, so they tell us nothing
6672 	 *    about the result
6673 	 * If the value in dst_reg is known nonnegative, then again the
6674 	 * unsigned bounts capture the signed bounds.
6675 	 * Thus, in all cases it suffices to blow away our signed bounds
6676 	 * and rely on inferring new ones from the unsigned bounds and
6677 	 * var_off of the result.
6678 	 */
6679 	dst_reg->s32_min_value = S32_MIN;
6680 	dst_reg->s32_max_value = S32_MAX;
6681 
6682 	dst_reg->var_off = tnum_rshift(subreg, umin_val);
6683 	dst_reg->u32_min_value >>= umax_val;
6684 	dst_reg->u32_max_value >>= umin_val;
6685 
6686 	__mark_reg64_unbounded(dst_reg);
6687 	__update_reg32_bounds(dst_reg);
6688 }
6689 
scalar_min_max_rsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6690 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg,
6691 			       struct bpf_reg_state *src_reg)
6692 {
6693 	u64 umax_val = src_reg->umax_value;
6694 	u64 umin_val = src_reg->umin_value;
6695 
6696 	/* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6697 	 * be negative, then either:
6698 	 * 1) src_reg might be zero, so the sign bit of the result is
6699 	 *    unknown, so we lose our signed bounds
6700 	 * 2) it's known negative, thus the unsigned bounds capture the
6701 	 *    signed bounds
6702 	 * 3) the signed bounds cross zero, so they tell us nothing
6703 	 *    about the result
6704 	 * If the value in dst_reg is known nonnegative, then again the
6705 	 * unsigned bounts capture the signed bounds.
6706 	 * Thus, in all cases it suffices to blow away our signed bounds
6707 	 * and rely on inferring new ones from the unsigned bounds and
6708 	 * var_off of the result.
6709 	 */
6710 	dst_reg->smin_value = S64_MIN;
6711 	dst_reg->smax_value = S64_MAX;
6712 	dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6713 	dst_reg->umin_value >>= umax_val;
6714 	dst_reg->umax_value >>= umin_val;
6715 
6716 	/* Its not easy to operate on alu32 bounds here because it depends
6717 	 * on bits being shifted in. Take easy way out and mark unbounded
6718 	 * so we can recalculate later from tnum.
6719 	 */
6720 	__mark_reg32_unbounded(dst_reg);
6721 	__update_reg_bounds(dst_reg);
6722 }
6723 
scalar32_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6724 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg,
6725 				  struct bpf_reg_state *src_reg)
6726 {
6727 	u64 umin_val = src_reg->u32_min_value;
6728 
6729 	/* Upon reaching here, src_known is true and
6730 	 * umax_val is equal to umin_val.
6731 	 */
6732 	dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6733 	dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6734 
6735 	dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 32);
6736 
6737 	/* blow away the dst_reg umin_value/umax_value and rely on
6738 	 * dst_reg var_off to refine the result.
6739 	 */
6740 	dst_reg->u32_min_value = 0;
6741 	dst_reg->u32_max_value = U32_MAX;
6742 
6743 	__mark_reg64_unbounded(dst_reg);
6744 	__update_reg32_bounds(dst_reg);
6745 }
6746 
scalar_min_max_arsh(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg)6747 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg,
6748 				struct bpf_reg_state *src_reg)
6749 {
6750 	u64 umin_val = src_reg->umin_value;
6751 
6752 	/* Upon reaching here, src_known is true and umax_val is equal
6753 	 * to umin_val.
6754 	 */
6755 	dst_reg->smin_value >>= umin_val;
6756 	dst_reg->smax_value >>= umin_val;
6757 
6758 	dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 64);
6759 
6760 	/* blow away the dst_reg umin_value/umax_value and rely on
6761 	 * dst_reg var_off to refine the result.
6762 	 */
6763 	dst_reg->umin_value = 0;
6764 	dst_reg->umax_value = U64_MAX;
6765 
6766 	/* Its not easy to operate on alu32 bounds here because it depends
6767 	 * on bits being shifted in from upper 32-bits. Take easy way out
6768 	 * and mark unbounded so we can recalculate later from tnum.
6769 	 */
6770 	__mark_reg32_unbounded(dst_reg);
6771 	__update_reg_bounds(dst_reg);
6772 }
6773 
6774 /* WARNING: This function does calculations on 64-bit values, but the actual
6775  * execution may occur on 32-bit values. Therefore, things like bitshifts
6776  * need extra checks in the 32-bit case.
6777  */
adjust_scalar_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state src_reg)6778 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
6779 				      struct bpf_insn *insn,
6780 				      struct bpf_reg_state *dst_reg,
6781 				      struct bpf_reg_state src_reg)
6782 {
6783 	struct bpf_reg_state *regs = cur_regs(env);
6784 	u8 opcode = BPF_OP(insn->code);
6785 	bool src_known;
6786 	s64 smin_val, smax_val;
6787 	u64 umin_val, umax_val;
6788 	s32 s32_min_val, s32_max_val;
6789 	u32 u32_min_val, u32_max_val;
6790 	u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32;
6791 	bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6792 	int ret;
6793 
6794 	smin_val = src_reg.smin_value;
6795 	smax_val = src_reg.smax_value;
6796 	umin_val = src_reg.umin_value;
6797 	umax_val = src_reg.umax_value;
6798 
6799 	s32_min_val = src_reg.s32_min_value;
6800 	s32_max_val = src_reg.s32_max_value;
6801 	u32_min_val = src_reg.u32_min_value;
6802 	u32_max_val = src_reg.u32_max_value;
6803 
6804 	if (alu32) {
6805 		src_known = tnum_subreg_is_const(src_reg.var_off);
6806 		if ((src_known &&
6807 		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
6808 		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
6809 			/* Taint dst register if offset had invalid bounds
6810 			 * derived from e.g. dead branches.
6811 			 */
6812 			__mark_reg_unknown(env, dst_reg);
6813 			return 0;
6814 		}
6815 	} else {
6816 		src_known = tnum_is_const(src_reg.var_off);
6817 		if ((src_known &&
6818 		     (smin_val != smax_val || umin_val != umax_val)) ||
6819 		    smin_val > smax_val || umin_val > umax_val) {
6820 			/* Taint dst register if offset had invalid bounds
6821 			 * derived from e.g. dead branches.
6822 			 */
6823 			__mark_reg_unknown(env, dst_reg);
6824 			return 0;
6825 		}
6826 	}
6827 
6828 	if (!src_known &&
6829 	    opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6830 		__mark_reg_unknown(env, dst_reg);
6831 		return 0;
6832 	}
6833 
6834 	if (sanitize_needed(opcode)) {
6835 		ret = sanitize_val_alu(env, insn);
6836 		if (ret < 0)
6837 			return sanitize_err(env, insn, ret, NULL, NULL);
6838 	}
6839 
6840 	/* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6841 	 * There are two classes of instructions: The first class we track both
6842 	 * alu32 and alu64 sign/unsigned bounds independently this provides the
6843 	 * greatest amount of precision when alu operations are mixed with jmp32
6844 	 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6845 	 * and BPF_OR. This is possible because these ops have fairly easy to
6846 	 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6847 	 * See alu32 verifier tests for examples. The second class of
6848 	 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6849 	 * with regards to tracking sign/unsigned bounds because the bits may
6850 	 * cross subreg boundaries in the alu64 case. When this happens we mark
6851 	 * the reg unbounded in the subreg bound space and use the resulting
6852 	 * tnum to calculate an approximation of the sign/unsigned bounds.
6853 	 */
6854 	switch (opcode) {
6855 	case BPF_ADD:
6856 		scalar32_min_max_add(dst_reg, &src_reg);
6857 		scalar_min_max_add(dst_reg, &src_reg);
6858 		dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6859 		break;
6860 	case BPF_SUB:
6861 		scalar32_min_max_sub(dst_reg, &src_reg);
6862 		scalar_min_max_sub(dst_reg, &src_reg);
6863 		dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6864 		break;
6865 	case BPF_MUL:
6866 		dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
6867 		scalar32_min_max_mul(dst_reg, &src_reg);
6868 		scalar_min_max_mul(dst_reg, &src_reg);
6869 		break;
6870 	case BPF_AND:
6871 		dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
6872 		scalar32_min_max_and(dst_reg, &src_reg);
6873 		scalar_min_max_and(dst_reg, &src_reg);
6874 		break;
6875 	case BPF_OR:
6876 		dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
6877 		scalar32_min_max_or(dst_reg, &src_reg);
6878 		scalar_min_max_or(dst_reg, &src_reg);
6879 		break;
6880 	case BPF_XOR:
6881 		dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6882 		scalar32_min_max_xor(dst_reg, &src_reg);
6883 		scalar_min_max_xor(dst_reg, &src_reg);
6884 		break;
6885 	case BPF_LSH:
6886 		if (umax_val >= insn_bitness) {
6887 			/* Shifts greater than 31 or 63 are undefined.
6888 			 * This includes shifts by a negative number.
6889 			 */
6890 			mark_reg_unknown(env, regs, insn->dst_reg);
6891 			break;
6892 		}
6893 		if (alu32)
6894 			scalar32_min_max_lsh(dst_reg, &src_reg);
6895 		else
6896 			scalar_min_max_lsh(dst_reg, &src_reg);
6897 		break;
6898 	case BPF_RSH:
6899 		if (umax_val >= insn_bitness) {
6900 			/* Shifts greater than 31 or 63 are undefined.
6901 			 * This includes shifts by a negative number.
6902 			 */
6903 			mark_reg_unknown(env, regs, insn->dst_reg);
6904 			break;
6905 		}
6906 		if (alu32)
6907 			scalar32_min_max_rsh(dst_reg, &src_reg);
6908 		else
6909 			scalar_min_max_rsh(dst_reg, &src_reg);
6910 		break;
6911 	case BPF_ARSH:
6912 		if (umax_val >= insn_bitness) {
6913 			/* Shifts greater than 31 or 63 are undefined.
6914 			 * This includes shifts by a negative number.
6915 			 */
6916 			mark_reg_unknown(env, regs, insn->dst_reg);
6917 			break;
6918 		}
6919 		if (alu32)
6920 			scalar32_min_max_arsh(dst_reg, &src_reg);
6921 		else
6922 			scalar_min_max_arsh(dst_reg, &src_reg);
6923 		break;
6924 	default:
6925 		mark_reg_unknown(env, regs, insn->dst_reg);
6926 		break;
6927 	}
6928 
6929 	/* ALU32 ops are zero extended into 64bit register */
6930 	if (alu32)
6931 		zext_32_to_64(dst_reg);
6932 	reg_bounds_sync(dst_reg);
6933 	return 0;
6934 }
6935 
6936 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
6937  * and var_off.
6938  */
adjust_reg_min_max_vals(struct bpf_verifier_env * env,struct bpf_insn * insn)6939 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env,
6940 				   struct bpf_insn *insn)
6941 {
6942 	struct bpf_verifier_state *vstate = env->cur_state;
6943 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
6944 	struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6945 	struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
6946 	u8 opcode = BPF_OP(insn->code);
6947 	int err;
6948 
6949 	dst_reg = &regs[insn->dst_reg];
6950 	src_reg = NULL;
6951 	if (dst_reg->type != SCALAR_VALUE)
6952 		ptr_reg = dst_reg;
6953 	else
6954 		/* Make sure ID is cleared otherwise dst_reg min/max could be
6955 		 * incorrectly propagated into other registers by find_equal_scalars()
6956 		 */
6957 		dst_reg->id = 0;
6958 	if (BPF_SRC(insn->code) == BPF_X) {
6959 		src_reg = &regs[insn->src_reg];
6960 		if (src_reg->type != SCALAR_VALUE) {
6961 			if (dst_reg->type != SCALAR_VALUE) {
6962 				/* Combining two pointers by any ALU op yields
6963 				 * an arbitrary scalar. Disallow all math except
6964 				 * pointer subtraction
6965 				 */
6966 				if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6967 					mark_reg_unknown(env, regs, insn->dst_reg);
6968 					return 0;
6969 				}
6970 				verbose(env, "R%d pointer %s pointer prohibited\n",
6971 					insn->dst_reg,
6972 					bpf_alu_string[opcode >> 4]);
6973 				return -EACCES;
6974 			} else {
6975 				/* scalar += pointer
6976 				 * This is legal, but we have to reverse our
6977 				 * src/dest handling in computing the range
6978 				 */
6979 				err = mark_chain_precision(env, insn->dst_reg);
6980 				if (err)
6981 					return err;
6982 				return adjust_ptr_min_max_vals(env, insn,
6983 							       src_reg, dst_reg);
6984 			}
6985 		} else if (ptr_reg) {
6986 			/* pointer += scalar */
6987 			err = mark_chain_precision(env, insn->src_reg);
6988 			if (err)
6989 				return err;
6990 			return adjust_ptr_min_max_vals(env, insn,
6991 						       dst_reg, src_reg);
6992 		}
6993 	} else {
6994 		/* Pretend the src is a reg with a known value, since we only
6995 		 * need to be able to read from this state.
6996 		 */
6997 		off_reg.type = SCALAR_VALUE;
6998 		__mark_reg_known(&off_reg, insn->imm);
6999 		src_reg = &off_reg;
7000 		if (ptr_reg) /* pointer += K */
7001 			return adjust_ptr_min_max_vals(env, insn,
7002 						       ptr_reg, src_reg);
7003 	}
7004 
7005 	/* Got here implies adding two SCALAR_VALUEs */
7006 	if (WARN_ON_ONCE(ptr_reg)) {
7007 		print_verifier_state(env, state);
7008 		verbose(env, "verifier internal error: unexpected ptr_reg\n");
7009 		return -EINVAL;
7010 	}
7011 	if (WARN_ON(!src_reg)) {
7012 		print_verifier_state(env, state);
7013 		verbose(env, "verifier internal error: no src_reg\n");
7014 		return -EINVAL;
7015 	}
7016 	return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
7017 }
7018 
7019 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env * env,struct bpf_insn * insn)7020 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
7021 {
7022 	struct bpf_reg_state *regs = cur_regs(env);
7023 	u8 opcode = BPF_OP(insn->code);
7024 	int err;
7025 
7026 	if (opcode == BPF_END || opcode == BPF_NEG) {
7027 		if (opcode == BPF_NEG) {
7028 			if (BPF_SRC(insn->code) != 0 ||
7029 			    insn->src_reg != BPF_REG_0 ||
7030 			    insn->off != 0 || insn->imm != 0) {
7031 				verbose(env, "BPF_NEG uses reserved fields\n");
7032 				return -EINVAL;
7033 			}
7034 		} else {
7035 			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
7036 			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
7037 			    BPF_CLASS(insn->code) == BPF_ALU64) {
7038 				verbose(env, "BPF_END uses reserved fields\n");
7039 				return -EINVAL;
7040 			}
7041 		}
7042 
7043 		/* check src operand */
7044 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7045 		if (err)
7046 			return err;
7047 
7048 		if (is_pointer_value(env, insn->dst_reg)) {
7049 			verbose(env, "R%d pointer arithmetic prohibited\n",
7050 				insn->dst_reg);
7051 			return -EACCES;
7052 		}
7053 
7054 		/* check dest operand */
7055 		err = check_reg_arg(env, insn->dst_reg, DST_OP);
7056 		if (err)
7057 			return err;
7058 
7059 	} else if (opcode == BPF_MOV) {
7060 
7061 		if (BPF_SRC(insn->code) == BPF_X) {
7062 			if (insn->imm != 0 || insn->off != 0) {
7063 				verbose(env, "BPF_MOV uses reserved fields\n");
7064 				return -EINVAL;
7065 			}
7066 
7067 			/* check src operand */
7068 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
7069 			if (err)
7070 				return err;
7071 		} else {
7072 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7073 				verbose(env, "BPF_MOV uses reserved fields\n");
7074 				return -EINVAL;
7075 			}
7076 		}
7077 
7078 		/* check dest operand, mark as required later */
7079 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7080 		if (err)
7081 			return err;
7082 
7083 		if (BPF_SRC(insn->code) == BPF_X) {
7084 			struct bpf_reg_state *src_reg = regs + insn->src_reg;
7085 			struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
7086 
7087 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
7088 				/* case: R1 = R2
7089 				 * copy register state to dest reg
7090 				 */
7091 				if (src_reg->type == SCALAR_VALUE && !src_reg->id)
7092 					/* Assign src and dst registers the same ID
7093 					 * that will be used by find_equal_scalars()
7094 					 * to propagate min/max range.
7095 					 */
7096 					src_reg->id = ++env->id_gen;
7097 				*dst_reg = *src_reg;
7098 				dst_reg->live |= REG_LIVE_WRITTEN;
7099 				dst_reg->subreg_def = DEF_NOT_SUBREG;
7100 			} else {
7101 				/* R1 = (u32) R2 */
7102 				if (is_pointer_value(env, insn->src_reg)) {
7103 					verbose(env,
7104 						"R%d partial copy of pointer\n",
7105 						insn->src_reg);
7106 					return -EACCES;
7107 				} else if (src_reg->type == SCALAR_VALUE) {
7108 					*dst_reg = *src_reg;
7109 					/* Make sure ID is cleared otherwise
7110 					 * dst_reg min/max could be incorrectly
7111 					 * propagated into src_reg by find_equal_scalars()
7112 					 */
7113 					dst_reg->id = 0;
7114 					dst_reg->live |= REG_LIVE_WRITTEN;
7115 					dst_reg->subreg_def = env->insn_idx + 1;
7116 				} else {
7117 					mark_reg_unknown(env, regs,
7118 							 insn->dst_reg);
7119 				}
7120 				zext_32_to_64(dst_reg);
7121 				reg_bounds_sync(dst_reg);
7122 			}
7123 		} else {
7124 			/* case: R = imm
7125 			 * remember the value we stored into this reg
7126 			 */
7127 			/* clear any state __mark_reg_known doesn't set */
7128 			mark_reg_unknown(env, regs, insn->dst_reg);
7129 			regs[insn->dst_reg].type = SCALAR_VALUE;
7130 			if (BPF_CLASS(insn->code) == BPF_ALU64) {
7131 				__mark_reg_known(regs + insn->dst_reg,
7132 						 insn->imm);
7133 			} else {
7134 				__mark_reg_known(regs + insn->dst_reg,
7135 						 (u32)insn->imm);
7136 			}
7137 		}
7138 
7139 	} else if (opcode > BPF_END) {
7140 		verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
7141 		return -EINVAL;
7142 
7143 	} else {	/* all other ALU ops: and, sub, xor, add, ... */
7144 
7145 		if (BPF_SRC(insn->code) == BPF_X) {
7146 			if (insn->imm != 0 || insn->off != 0) {
7147 				verbose(env, "BPF_ALU uses reserved fields\n");
7148 				return -EINVAL;
7149 			}
7150 			/* check src1 operand */
7151 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
7152 			if (err)
7153 				return err;
7154 		} else {
7155 			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7156 				verbose(env, "BPF_ALU uses reserved fields\n");
7157 				return -EINVAL;
7158 			}
7159 		}
7160 
7161 		/* check src2 operand */
7162 		err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7163 		if (err)
7164 			return err;
7165 
7166 		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
7167 		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
7168 			verbose(env, "div by zero\n");
7169 			return -EINVAL;
7170 		}
7171 
7172 		if ((opcode == BPF_LSH || opcode == BPF_RSH ||
7173 		     opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
7174 			int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
7175 
7176 			if (insn->imm < 0 || insn->imm >= size) {
7177 				verbose(env, "invalid shift %d\n", insn->imm);
7178 				return -EINVAL;
7179 			}
7180 		}
7181 
7182 		/* check dest operand */
7183 		err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7184 		if (err)
7185 			return err;
7186 
7187 		return adjust_reg_min_max_vals(env, insn);
7188 	}
7189 
7190 	return 0;
7191 }
7192 
find_good_pkt_pointers(struct bpf_verifier_state * vstate,struct bpf_reg_state * dst_reg,enum bpf_reg_type type,bool range_right_open)7193 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate,
7194 				   struct bpf_reg_state *dst_reg,
7195 				   enum bpf_reg_type type,
7196 				   bool range_right_open)
7197 {
7198 	struct bpf_func_state *state;
7199 	struct bpf_reg_state *reg;
7200 	int new_range;
7201 
7202 	if (dst_reg->off < 0 ||
7203 	    (dst_reg->off == 0 && range_right_open))
7204 		/* This doesn't give us any range */
7205 		return;
7206 
7207 	if (dst_reg->umax_value > MAX_PACKET_OFF ||
7208 	    dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF)
7209 		/* Risk of overflow.  For instance, ptr + (1<<63) may be less
7210 		 * than pkt_end, but that's because it's also less than pkt.
7211 		 */
7212 		return;
7213 
7214 	new_range = dst_reg->off;
7215 	if (range_right_open)
7216 		new_range++;
7217 
7218 	/* Examples for register markings:
7219 	 *
7220 	 * pkt_data in dst register:
7221 	 *
7222 	 *   r2 = r3;
7223 	 *   r2 += 8;
7224 	 *   if (r2 > pkt_end) goto <handle exception>
7225 	 *   <access okay>
7226 	 *
7227 	 *   r2 = r3;
7228 	 *   r2 += 8;
7229 	 *   if (r2 < pkt_end) goto <access okay>
7230 	 *   <handle exception>
7231 	 *
7232 	 *   Where:
7233 	 *     r2 == dst_reg, pkt_end == src_reg
7234 	 *     r2=pkt(id=n,off=8,r=0)
7235 	 *     r3=pkt(id=n,off=0,r=0)
7236 	 *
7237 	 * pkt_data in src register:
7238 	 *
7239 	 *   r2 = r3;
7240 	 *   r2 += 8;
7241 	 *   if (pkt_end >= r2) goto <access okay>
7242 	 *   <handle exception>
7243 	 *
7244 	 *   r2 = r3;
7245 	 *   r2 += 8;
7246 	 *   if (pkt_end <= r2) goto <handle exception>
7247 	 *   <access okay>
7248 	 *
7249 	 *   Where:
7250 	 *     pkt_end == dst_reg, r2 == src_reg
7251 	 *     r2=pkt(id=n,off=8,r=0)
7252 	 *     r3=pkt(id=n,off=0,r=0)
7253 	 *
7254 	 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
7255 	 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
7256 	 * and [r3, r3 + 8-1) respectively is safe to access depending on
7257 	 * the check.
7258 	 */
7259 
7260 	/* If our ids match, then we must have the same max_value.  And we
7261 	 * don't care about the other reg's fixed offset, since if it's too big
7262 	 * the range won't allow anything.
7263 	 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
7264 	 */
7265 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
7266 		if (reg->type == type && reg->id == dst_reg->id)
7267 			/* keep the maximum range already checked */
7268 			reg->range = max(reg->range, new_range);
7269 	}));
7270 }
7271 
is_branch32_taken(struct bpf_reg_state * reg,u32 val,u8 opcode)7272 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
7273 {
7274 	struct tnum subreg = tnum_subreg(reg->var_off);
7275 	s32 sval = (s32)val;
7276 
7277 	switch (opcode) {
7278 	case BPF_JEQ:
7279 		if (tnum_is_const(subreg))
7280 			return !!tnum_equals_const(subreg, val);
7281 		break;
7282 	case BPF_JNE:
7283 		if (tnum_is_const(subreg))
7284 			return !tnum_equals_const(subreg, val);
7285 		break;
7286 	case BPF_JSET:
7287 		if ((~subreg.mask & subreg.value) & val)
7288 			return 1;
7289 		if (!((subreg.mask | subreg.value) & val))
7290 			return 0;
7291 		break;
7292 	case BPF_JGT:
7293 		if (reg->u32_min_value > val)
7294 			return 1;
7295 		else if (reg->u32_max_value <= val)
7296 			return 0;
7297 		break;
7298 	case BPF_JSGT:
7299 		if (reg->s32_min_value > sval)
7300 			return 1;
7301 		else if (reg->s32_max_value <= sval)
7302 			return 0;
7303 		break;
7304 	case BPF_JLT:
7305 		if (reg->u32_max_value < val)
7306 			return 1;
7307 		else if (reg->u32_min_value >= val)
7308 			return 0;
7309 		break;
7310 	case BPF_JSLT:
7311 		if (reg->s32_max_value < sval)
7312 			return 1;
7313 		else if (reg->s32_min_value >= sval)
7314 			return 0;
7315 		break;
7316 	case BPF_JGE:
7317 		if (reg->u32_min_value >= val)
7318 			return 1;
7319 		else if (reg->u32_max_value < val)
7320 			return 0;
7321 		break;
7322 	case BPF_JSGE:
7323 		if (reg->s32_min_value >= sval)
7324 			return 1;
7325 		else if (reg->s32_max_value < sval)
7326 			return 0;
7327 		break;
7328 	case BPF_JLE:
7329 		if (reg->u32_max_value <= val)
7330 			return 1;
7331 		else if (reg->u32_min_value > val)
7332 			return 0;
7333 		break;
7334 	case BPF_JSLE:
7335 		if (reg->s32_max_value <= sval)
7336 			return 1;
7337 		else if (reg->s32_min_value > sval)
7338 			return 0;
7339 		break;
7340 	}
7341 
7342 	return -1;
7343 }
7344 
7345 
is_branch64_taken(struct bpf_reg_state * reg,u64 val,u8 opcode)7346 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7347 {
7348 	s64 sval = (s64)val;
7349 
7350 	switch (opcode) {
7351 	case BPF_JEQ:
7352 		if (tnum_is_const(reg->var_off))
7353 			return !!tnum_equals_const(reg->var_off, val);
7354 		break;
7355 	case BPF_JNE:
7356 		if (tnum_is_const(reg->var_off))
7357 			return !tnum_equals_const(reg->var_off, val);
7358 		break;
7359 	case BPF_JSET:
7360 		if ((~reg->var_off.mask & reg->var_off.value) & val)
7361 			return 1;
7362 		if (!((reg->var_off.mask | reg->var_off.value) & val))
7363 			return 0;
7364 		break;
7365 	case BPF_JGT:
7366 		if (reg->umin_value > val)
7367 			return 1;
7368 		else if (reg->umax_value <= val)
7369 			return 0;
7370 		break;
7371 	case BPF_JSGT:
7372 		if (reg->smin_value > sval)
7373 			return 1;
7374 		else if (reg->smax_value <= sval)
7375 			return 0;
7376 		break;
7377 	case BPF_JLT:
7378 		if (reg->umax_value < val)
7379 			return 1;
7380 		else if (reg->umin_value >= val)
7381 			return 0;
7382 		break;
7383 	case BPF_JSLT:
7384 		if (reg->smax_value < sval)
7385 			return 1;
7386 		else if (reg->smin_value >= sval)
7387 			return 0;
7388 		break;
7389 	case BPF_JGE:
7390 		if (reg->umin_value >= val)
7391 			return 1;
7392 		else if (reg->umax_value < val)
7393 			return 0;
7394 		break;
7395 	case BPF_JSGE:
7396 		if (reg->smin_value >= sval)
7397 			return 1;
7398 		else if (reg->smax_value < sval)
7399 			return 0;
7400 		break;
7401 	case BPF_JLE:
7402 		if (reg->umax_value <= val)
7403 			return 1;
7404 		else if (reg->umin_value > val)
7405 			return 0;
7406 		break;
7407 	case BPF_JSLE:
7408 		if (reg->smax_value <= sval)
7409 			return 1;
7410 		else if (reg->smin_value > sval)
7411 			return 0;
7412 		break;
7413 	}
7414 
7415 	return -1;
7416 }
7417 
7418 /* compute branch direction of the expression "if (reg opcode val) goto target;"
7419  * and return:
7420  *  1 - branch will be taken and "goto target" will be executed
7421  *  0 - branch will not be taken and fall-through to next insn
7422  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7423  *      range [0,10]
7424  */
is_branch_taken(struct bpf_reg_state * reg,u64 val,u8 opcode,bool is_jmp32)7425 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
7426 			   bool is_jmp32)
7427 {
7428 	if (__is_pointer_value(false, reg)) {
7429 		if (!reg_type_not_null(reg->type))
7430 			return -1;
7431 
7432 		/* If pointer is valid tests against zero will fail so we can
7433 		 * use this to direct branch taken.
7434 		 */
7435 		if (val != 0)
7436 			return -1;
7437 
7438 		switch (opcode) {
7439 		case BPF_JEQ:
7440 			return 0;
7441 		case BPF_JNE:
7442 			return 1;
7443 		default:
7444 			return -1;
7445 		}
7446 	}
7447 
7448 	if (is_jmp32)
7449 		return is_branch32_taken(reg, val, opcode);
7450 	return is_branch64_taken(reg, val, opcode);
7451 }
7452 
flip_opcode(u32 opcode)7453 static int flip_opcode(u32 opcode)
7454 {
7455 	/* How can we transform "a <op> b" into "b <op> a"? */
7456 	static const u8 opcode_flip[16] = {
7457 		/* these stay the same */
7458 		[BPF_JEQ  >> 4] = BPF_JEQ,
7459 		[BPF_JNE  >> 4] = BPF_JNE,
7460 		[BPF_JSET >> 4] = BPF_JSET,
7461 		/* these swap "lesser" and "greater" (L and G in the opcodes) */
7462 		[BPF_JGE  >> 4] = BPF_JLE,
7463 		[BPF_JGT  >> 4] = BPF_JLT,
7464 		[BPF_JLE  >> 4] = BPF_JGE,
7465 		[BPF_JLT  >> 4] = BPF_JGT,
7466 		[BPF_JSGE >> 4] = BPF_JSLE,
7467 		[BPF_JSGT >> 4] = BPF_JSLT,
7468 		[BPF_JSLE >> 4] = BPF_JSGE,
7469 		[BPF_JSLT >> 4] = BPF_JSGT
7470 	};
7471 	return opcode_flip[opcode >> 4];
7472 }
7473 
is_pkt_ptr_branch_taken(struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,u8 opcode)7474 static int is_pkt_ptr_branch_taken(struct bpf_reg_state *dst_reg,
7475 				   struct bpf_reg_state *src_reg,
7476 				   u8 opcode)
7477 {
7478 	struct bpf_reg_state *pkt;
7479 
7480 	if (src_reg->type == PTR_TO_PACKET_END) {
7481 		pkt = dst_reg;
7482 	} else if (dst_reg->type == PTR_TO_PACKET_END) {
7483 		pkt = src_reg;
7484 		opcode = flip_opcode(opcode);
7485 	} else {
7486 		return -1;
7487 	}
7488 
7489 	if (pkt->range >= 0)
7490 		return -1;
7491 
7492 	switch (opcode) {
7493 	case BPF_JLE:
7494 		/* pkt <= pkt_end */
7495 		fallthrough;
7496 	case BPF_JGT:
7497 		/* pkt > pkt_end */
7498 		if (pkt->range == BEYOND_PKT_END)
7499 			/* pkt has at last one extra byte beyond pkt_end */
7500 			return opcode == BPF_JGT;
7501 		break;
7502 	case BPF_JLT:
7503 		/* pkt < pkt_end */
7504 		fallthrough;
7505 	case BPF_JGE:
7506 		/* pkt >= pkt_end */
7507 		if (pkt->range == BEYOND_PKT_END || pkt->range == AT_PKT_END)
7508 			return opcode == BPF_JGE;
7509 		break;
7510 	}
7511 	return -1;
7512 }
7513 
7514 /* Adjusts the register min/max values in the case that the dst_reg is the
7515  * variable register that we are working on, and src_reg is a constant or we're
7516  * simply doing a BPF_K check.
7517  * In JEQ/JNE cases we also adjust the var_off values.
7518  */
reg_set_min_max(struct bpf_reg_state * true_reg,struct bpf_reg_state * false_reg,u64 val,u32 val32,u8 opcode,bool is_jmp32)7519 static void reg_set_min_max(struct bpf_reg_state *true_reg,
7520 			    struct bpf_reg_state *false_reg,
7521 			    u64 val, u32 val32,
7522 			    u8 opcode, bool is_jmp32)
7523 {
7524 	struct tnum false_32off = tnum_subreg(false_reg->var_off);
7525 	struct tnum false_64off = false_reg->var_off;
7526 	struct tnum true_32off = tnum_subreg(true_reg->var_off);
7527 	struct tnum true_64off = true_reg->var_off;
7528 	s64 sval = (s64)val;
7529 	s32 sval32 = (s32)val32;
7530 
7531 	/* If the dst_reg is a pointer, we can't learn anything about its
7532 	 * variable offset from the compare (unless src_reg were a pointer into
7533 	 * the same object, but we don't bother with that.
7534 	 * Since false_reg and true_reg have the same type by construction, we
7535 	 * only need to check one of them for pointerness.
7536 	 */
7537 	if (__is_pointer_value(false, false_reg))
7538 		return;
7539 
7540 	switch (opcode) {
7541 	/* JEQ/JNE comparison doesn't change the register equivalence.
7542 	 *
7543 	 * r1 = r2;
7544 	 * if (r1 == 42) goto label;
7545 	 * ...
7546 	 * label: // here both r1 and r2 are known to be 42.
7547 	 *
7548 	 * Hence when marking register as known preserve it's ID.
7549 	 */
7550 	case BPF_JEQ:
7551 		if (is_jmp32) {
7552 			__mark_reg32_known(true_reg, val32);
7553 			true_32off = tnum_subreg(true_reg->var_off);
7554 		} else {
7555 			___mark_reg_known(true_reg, val);
7556 			true_64off = true_reg->var_off;
7557 		}
7558 		break;
7559 	case BPF_JNE:
7560 		if (is_jmp32) {
7561 			__mark_reg32_known(false_reg, val32);
7562 			false_32off = tnum_subreg(false_reg->var_off);
7563 		} else {
7564 			___mark_reg_known(false_reg, val);
7565 			false_64off = false_reg->var_off;
7566 		}
7567 		break;
7568 	case BPF_JSET:
7569 		if (is_jmp32) {
7570 			false_32off = tnum_and(false_32off, tnum_const(~val32));
7571 			if (is_power_of_2(val32))
7572 				true_32off = tnum_or(true_32off,
7573 						     tnum_const(val32));
7574 		} else {
7575 			false_64off = tnum_and(false_64off, tnum_const(~val));
7576 			if (is_power_of_2(val))
7577 				true_64off = tnum_or(true_64off,
7578 						     tnum_const(val));
7579 		}
7580 		break;
7581 	case BPF_JGE:
7582 	case BPF_JGT:
7583 	{
7584 		if (is_jmp32) {
7585 			u32 false_umax = opcode == BPF_JGT ? val32  : val32 - 1;
7586 			u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7587 
7588 			false_reg->u32_max_value = min(false_reg->u32_max_value,
7589 						       false_umax);
7590 			true_reg->u32_min_value = max(true_reg->u32_min_value,
7591 						      true_umin);
7592 		} else {
7593 			u64 false_umax = opcode == BPF_JGT ? val    : val - 1;
7594 			u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7595 
7596 			false_reg->umax_value = min(false_reg->umax_value, false_umax);
7597 			true_reg->umin_value = max(true_reg->umin_value, true_umin);
7598 		}
7599 		break;
7600 	}
7601 	case BPF_JSGE:
7602 	case BPF_JSGT:
7603 	{
7604 		if (is_jmp32) {
7605 			s32 false_smax = opcode == BPF_JSGT ? sval32    : sval32 - 1;
7606 			s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7607 
7608 			false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7609 			true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7610 		} else {
7611 			s64 false_smax = opcode == BPF_JSGT ? sval    : sval - 1;
7612 			s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7613 
7614 			false_reg->smax_value = min(false_reg->smax_value, false_smax);
7615 			true_reg->smin_value = max(true_reg->smin_value, true_smin);
7616 		}
7617 		break;
7618 	}
7619 	case BPF_JLE:
7620 	case BPF_JLT:
7621 	{
7622 		if (is_jmp32) {
7623 			u32 false_umin = opcode == BPF_JLT ? val32  : val32 + 1;
7624 			u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7625 
7626 			false_reg->u32_min_value = max(false_reg->u32_min_value,
7627 						       false_umin);
7628 			true_reg->u32_max_value = min(true_reg->u32_max_value,
7629 						      true_umax);
7630 		} else {
7631 			u64 false_umin = opcode == BPF_JLT ? val    : val + 1;
7632 			u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7633 
7634 			false_reg->umin_value = max(false_reg->umin_value, false_umin);
7635 			true_reg->umax_value = min(true_reg->umax_value, true_umax);
7636 		}
7637 		break;
7638 	}
7639 	case BPF_JSLE:
7640 	case BPF_JSLT:
7641 	{
7642 		if (is_jmp32) {
7643 			s32 false_smin = opcode == BPF_JSLT ? sval32    : sval32 + 1;
7644 			s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7645 
7646 			false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7647 			true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7648 		} else {
7649 			s64 false_smin = opcode == BPF_JSLT ? sval    : sval + 1;
7650 			s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7651 
7652 			false_reg->smin_value = max(false_reg->smin_value, false_smin);
7653 			true_reg->smax_value = min(true_reg->smax_value, true_smax);
7654 		}
7655 		break;
7656 	}
7657 	default:
7658 		return;
7659 	}
7660 
7661 	if (is_jmp32) {
7662 		false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off),
7663 					     tnum_subreg(false_32off));
7664 		true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off),
7665 					    tnum_subreg(true_32off));
7666 		__reg_combine_32_into_64(false_reg);
7667 		__reg_combine_32_into_64(true_reg);
7668 	} else {
7669 		false_reg->var_off = false_64off;
7670 		true_reg->var_off = true_64off;
7671 		__reg_combine_64_into_32(false_reg);
7672 		__reg_combine_64_into_32(true_reg);
7673 	}
7674 }
7675 
7676 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
7677  * the variable reg.
7678  */
reg_set_min_max_inv(struct bpf_reg_state * true_reg,struct bpf_reg_state * false_reg,u64 val,u32 val32,u8 opcode,bool is_jmp32)7679 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
7680 				struct bpf_reg_state *false_reg,
7681 				u64 val, u32 val32,
7682 				u8 opcode, bool is_jmp32)
7683 {
7684 	opcode = flip_opcode(opcode);
7685 	/* This uses zero as "not present in table"; luckily the zero opcode,
7686 	 * BPF_JA, can't get here.
7687 	 */
7688 	if (opcode)
7689 		reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7690 }
7691 
7692 /* Regs are known to be equal, so intersect their min/max/var_off */
__reg_combine_min_max(struct bpf_reg_state * src_reg,struct bpf_reg_state * dst_reg)7693 static void __reg_combine_min_max(struct bpf_reg_state *src_reg,
7694 				  struct bpf_reg_state *dst_reg)
7695 {
7696 	src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value,
7697 							dst_reg->umin_value);
7698 	src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value,
7699 							dst_reg->umax_value);
7700 	src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value,
7701 							dst_reg->smin_value);
7702 	src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value,
7703 							dst_reg->smax_value);
7704 	src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
7705 							     dst_reg->var_off);
7706 	reg_bounds_sync(src_reg);
7707 	reg_bounds_sync(dst_reg);
7708 }
7709 
reg_combine_min_max(struct bpf_reg_state * true_src,struct bpf_reg_state * true_dst,struct bpf_reg_state * false_src,struct bpf_reg_state * false_dst,u8 opcode)7710 static void reg_combine_min_max(struct bpf_reg_state *true_src,
7711 				struct bpf_reg_state *true_dst,
7712 				struct bpf_reg_state *false_src,
7713 				struct bpf_reg_state *false_dst,
7714 				u8 opcode)
7715 {
7716 	switch (opcode) {
7717 	case BPF_JEQ:
7718 		__reg_combine_min_max(true_src, true_dst);
7719 		break;
7720 	case BPF_JNE:
7721 		__reg_combine_min_max(false_src, false_dst);
7722 		break;
7723 	}
7724 }
7725 
mark_ptr_or_null_reg(struct bpf_func_state * state,struct bpf_reg_state * reg,u32 id,bool is_null)7726 static void mark_ptr_or_null_reg(struct bpf_func_state *state,
7727 				 struct bpf_reg_state *reg, u32 id,
7728 				 bool is_null)
7729 {
7730 	if (reg_type_may_be_null(reg->type) && reg->id == id &&
7731 	    !WARN_ON_ONCE(!reg->id)) {
7732 		if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
7733 				 !tnum_equals_const(reg->var_off, 0) ||
7734 				 reg->off)) {
7735 			/* Old offset (both fixed and variable parts) should
7736 			 * have been known-zero, because we don't allow pointer
7737 			 * arithmetic on pointers that might be NULL. If we
7738 			 * see this happening, don't convert the register.
7739 			 */
7740 			return;
7741 		}
7742 		if (is_null) {
7743 			reg->type = SCALAR_VALUE;
7744 		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
7745 			const struct bpf_map *map = reg->map_ptr;
7746 
7747 			if (map->inner_map_meta) {
7748 				reg->type = CONST_PTR_TO_MAP;
7749 				reg->map_ptr = map->inner_map_meta;
7750 			} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7751 				reg->type = PTR_TO_XDP_SOCK;
7752 			} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
7753 				   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7754 				reg->type = PTR_TO_SOCKET;
7755 			} else {
7756 				reg->type = PTR_TO_MAP_VALUE;
7757 			}
7758 		} else if (reg->type == PTR_TO_SOCKET_OR_NULL) {
7759 			reg->type = PTR_TO_SOCKET;
7760 		} else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) {
7761 			reg->type = PTR_TO_SOCK_COMMON;
7762 		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
7763 			reg->type = PTR_TO_TCP_SOCK;
7764 		} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
7765 			reg->type = PTR_TO_BTF_ID;
7766 		} else if (reg->type == PTR_TO_MEM_OR_NULL) {
7767 			reg->type = PTR_TO_MEM;
7768 		} else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) {
7769 			reg->type = PTR_TO_RDONLY_BUF;
7770 		} else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) {
7771 			reg->type = PTR_TO_RDWR_BUF;
7772 		}
7773 		if (is_null) {
7774 			/* We don't need id and ref_obj_id from this point
7775 			 * onwards anymore, thus we should better reset it,
7776 			 * so that state pruning has chances to take effect.
7777 			 */
7778 			reg->id = 0;
7779 			reg->ref_obj_id = 0;
7780 		} else if (!reg_may_point_to_spin_lock(reg)) {
7781 			/* For not-NULL ptr, reg->ref_obj_id will be reset
7782 			 * in release_reference().
7783 			 *
7784 			 * reg->id is still used by spin_lock ptr. Other
7785 			 * than spin_lock ptr type, reg->id can be reset.
7786 			 */
7787 			reg->id = 0;
7788 		}
7789 	}
7790 }
7791 
7792 /* The logic is similar to find_good_pkt_pointers(), both could eventually
7793  * be folded together at some point.
7794  */
mark_ptr_or_null_regs(struct bpf_verifier_state * vstate,u32 regno,bool is_null)7795 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
7796 				  bool is_null)
7797 {
7798 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
7799 	struct bpf_reg_state *regs = state->regs, *reg;
7800 	u32 ref_obj_id = regs[regno].ref_obj_id;
7801 	u32 id = regs[regno].id;
7802 
7803 	if (ref_obj_id && ref_obj_id == id && is_null)
7804 		/* regs[regno] is in the " == NULL" branch.
7805 		 * No one could have freed the reference state before
7806 		 * doing the NULL check.
7807 		 */
7808 		WARN_ON_ONCE(release_reference_state(state, id));
7809 
7810 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
7811 		mark_ptr_or_null_reg(state, reg, id, is_null);
7812 	}));
7813 }
7814 
try_match_pkt_pointers(const struct bpf_insn * insn,struct bpf_reg_state * dst_reg,struct bpf_reg_state * src_reg,struct bpf_verifier_state * this_branch,struct bpf_verifier_state * other_branch)7815 static bool try_match_pkt_pointers(const struct bpf_insn *insn,
7816 				   struct bpf_reg_state *dst_reg,
7817 				   struct bpf_reg_state *src_reg,
7818 				   struct bpf_verifier_state *this_branch,
7819 				   struct bpf_verifier_state *other_branch)
7820 {
7821 	if (BPF_SRC(insn->code) != BPF_X)
7822 		return false;
7823 
7824 	/* Pointers are always 64-bit. */
7825 	if (BPF_CLASS(insn->code) == BPF_JMP32)
7826 		return false;
7827 
7828 	switch (BPF_OP(insn->code)) {
7829 	case BPF_JGT:
7830 		if ((dst_reg->type == PTR_TO_PACKET &&
7831 		     src_reg->type == PTR_TO_PACKET_END) ||
7832 		    (dst_reg->type == PTR_TO_PACKET_META &&
7833 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7834 			/* pkt_data' > pkt_end, pkt_meta' > pkt_data */
7835 			find_good_pkt_pointers(this_branch, dst_reg,
7836 					       dst_reg->type, false);
7837 			mark_pkt_end(other_branch, insn->dst_reg, true);
7838 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
7839 			    src_reg->type == PTR_TO_PACKET) ||
7840 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7841 			    src_reg->type == PTR_TO_PACKET_META)) {
7842 			/* pkt_end > pkt_data', pkt_data > pkt_meta' */
7843 			find_good_pkt_pointers(other_branch, src_reg,
7844 					       src_reg->type, true);
7845 			mark_pkt_end(this_branch, insn->src_reg, false);
7846 		} else {
7847 			return false;
7848 		}
7849 		break;
7850 	case BPF_JLT:
7851 		if ((dst_reg->type == PTR_TO_PACKET &&
7852 		     src_reg->type == PTR_TO_PACKET_END) ||
7853 		    (dst_reg->type == PTR_TO_PACKET_META &&
7854 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7855 			/* pkt_data' < pkt_end, pkt_meta' < pkt_data */
7856 			find_good_pkt_pointers(other_branch, dst_reg,
7857 					       dst_reg->type, true);
7858 			mark_pkt_end(this_branch, insn->dst_reg, false);
7859 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
7860 			    src_reg->type == PTR_TO_PACKET) ||
7861 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7862 			    src_reg->type == PTR_TO_PACKET_META)) {
7863 			/* pkt_end < pkt_data', pkt_data > pkt_meta' */
7864 			find_good_pkt_pointers(this_branch, src_reg,
7865 					       src_reg->type, false);
7866 			mark_pkt_end(other_branch, insn->src_reg, true);
7867 		} else {
7868 			return false;
7869 		}
7870 		break;
7871 	case BPF_JGE:
7872 		if ((dst_reg->type == PTR_TO_PACKET &&
7873 		     src_reg->type == PTR_TO_PACKET_END) ||
7874 		    (dst_reg->type == PTR_TO_PACKET_META &&
7875 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7876 			/* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
7877 			find_good_pkt_pointers(this_branch, dst_reg,
7878 					       dst_reg->type, true);
7879 			mark_pkt_end(other_branch, insn->dst_reg, false);
7880 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
7881 			    src_reg->type == PTR_TO_PACKET) ||
7882 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7883 			    src_reg->type == PTR_TO_PACKET_META)) {
7884 			/* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
7885 			find_good_pkt_pointers(other_branch, src_reg,
7886 					       src_reg->type, false);
7887 			mark_pkt_end(this_branch, insn->src_reg, true);
7888 		} else {
7889 			return false;
7890 		}
7891 		break;
7892 	case BPF_JLE:
7893 		if ((dst_reg->type == PTR_TO_PACKET &&
7894 		     src_reg->type == PTR_TO_PACKET_END) ||
7895 		    (dst_reg->type == PTR_TO_PACKET_META &&
7896 		     reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7897 			/* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
7898 			find_good_pkt_pointers(other_branch, dst_reg,
7899 					       dst_reg->type, false);
7900 			mark_pkt_end(this_branch, insn->dst_reg, true);
7901 		} else if ((dst_reg->type == PTR_TO_PACKET_END &&
7902 			    src_reg->type == PTR_TO_PACKET) ||
7903 			   (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) &&
7904 			    src_reg->type == PTR_TO_PACKET_META)) {
7905 			/* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
7906 			find_good_pkt_pointers(this_branch, src_reg,
7907 					       src_reg->type, true);
7908 			mark_pkt_end(other_branch, insn->src_reg, false);
7909 		} else {
7910 			return false;
7911 		}
7912 		break;
7913 	default:
7914 		return false;
7915 	}
7916 
7917 	return true;
7918 }
7919 
find_equal_scalars(struct bpf_verifier_state * vstate,struct bpf_reg_state * known_reg)7920 static void find_equal_scalars(struct bpf_verifier_state *vstate,
7921 			       struct bpf_reg_state *known_reg)
7922 {
7923 	struct bpf_func_state *state;
7924 	struct bpf_reg_state *reg;
7925 
7926 	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
7927 		if (reg->type == SCALAR_VALUE && reg->id == known_reg->id)
7928 			*reg = *known_reg;
7929 	}));
7930 }
7931 
check_cond_jmp_op(struct bpf_verifier_env * env,struct bpf_insn * insn,int * insn_idx)7932 static int check_cond_jmp_op(struct bpf_verifier_env *env,
7933 			     struct bpf_insn *insn, int *insn_idx)
7934 {
7935 	struct bpf_verifier_state *this_branch = env->cur_state;
7936 	struct bpf_verifier_state *other_branch;
7937 	struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7938 	struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7939 	u8 opcode = BPF_OP(insn->code);
7940 	bool is_jmp32;
7941 	int pred = -1;
7942 	int err;
7943 
7944 	/* Only conditional jumps are expected to reach here. */
7945 	if (opcode == BPF_JA || opcode > BPF_JSLE) {
7946 		verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7947 		return -EINVAL;
7948 	}
7949 
7950 	if (BPF_SRC(insn->code) == BPF_X) {
7951 		if (insn->imm != 0) {
7952 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7953 			return -EINVAL;
7954 		}
7955 
7956 		/* check src1 operand */
7957 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
7958 		if (err)
7959 			return err;
7960 
7961 		if (is_pointer_value(env, insn->src_reg)) {
7962 			verbose(env, "R%d pointer comparison prohibited\n",
7963 				insn->src_reg);
7964 			return -EACCES;
7965 		}
7966 		src_reg = &regs[insn->src_reg];
7967 	} else {
7968 		if (insn->src_reg != BPF_REG_0) {
7969 			verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7970 			return -EINVAL;
7971 		}
7972 	}
7973 
7974 	/* check src2 operand */
7975 	err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7976 	if (err)
7977 		return err;
7978 
7979 	dst_reg = &regs[insn->dst_reg];
7980 	is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
7981 
7982 	if (BPF_SRC(insn->code) == BPF_K) {
7983 		pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7984 	} else if (src_reg->type == SCALAR_VALUE &&
7985 		   is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7986 		pred = is_branch_taken(dst_reg,
7987 				       tnum_subreg(src_reg->var_off).value,
7988 				       opcode,
7989 				       is_jmp32);
7990 	} else if (src_reg->type == SCALAR_VALUE &&
7991 		   !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7992 		pred = is_branch_taken(dst_reg,
7993 				       src_reg->var_off.value,
7994 				       opcode,
7995 				       is_jmp32);
7996 	} else if (reg_is_pkt_pointer_any(dst_reg) &&
7997 		   reg_is_pkt_pointer_any(src_reg) &&
7998 		   !is_jmp32) {
7999 		pred = is_pkt_ptr_branch_taken(dst_reg, src_reg, opcode);
8000 	}
8001 
8002 	if (pred >= 0) {
8003 		/* If we get here with a dst_reg pointer type it is because
8004 		 * above is_branch_taken() special cased the 0 comparison.
8005 		 */
8006 		if (!__is_pointer_value(false, dst_reg))
8007 			err = mark_chain_precision(env, insn->dst_reg);
8008 		if (BPF_SRC(insn->code) == BPF_X && !err &&
8009 		    !__is_pointer_value(false, src_reg))
8010 			err = mark_chain_precision(env, insn->src_reg);
8011 		if (err)
8012 			return err;
8013 	}
8014 
8015 	if (pred == 1) {
8016 		/* Only follow the goto, ignore fall-through. If needed, push
8017 		 * the fall-through branch for simulation under speculative
8018 		 * execution.
8019 		 */
8020 		if (!env->bypass_spec_v1 &&
8021 		    !sanitize_speculative_path(env, insn, *insn_idx + 1,
8022 					       *insn_idx))
8023 			return -EFAULT;
8024 		*insn_idx += insn->off;
8025 		return 0;
8026 	} else if (pred == 0) {
8027 		/* Only follow the fall-through branch, since that's where the
8028 		 * program will go. If needed, push the goto branch for
8029 		 * simulation under speculative execution.
8030 		 */
8031 		if (!env->bypass_spec_v1 &&
8032 		    !sanitize_speculative_path(env, insn,
8033 					       *insn_idx + insn->off + 1,
8034 					       *insn_idx))
8035 			return -EFAULT;
8036 		return 0;
8037 	}
8038 
8039 	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx,
8040 				  false);
8041 	if (!other_branch)
8042 		return -EFAULT;
8043 	other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
8044 
8045 	/* detect if we are comparing against a constant value so we can adjust
8046 	 * our min/max values for our dst register.
8047 	 * this is only legit if both are scalars (or pointers to the same
8048 	 * object, I suppose, but we don't support that right now), because
8049 	 * otherwise the different base pointers mean the offsets aren't
8050 	 * comparable.
8051 	 */
8052 	if (BPF_SRC(insn->code) == BPF_X) {
8053 		struct bpf_reg_state *src_reg = &regs[insn->src_reg];
8054 
8055 		if (dst_reg->type == SCALAR_VALUE &&
8056 		    src_reg->type == SCALAR_VALUE) {
8057 			if (tnum_is_const(src_reg->var_off) ||
8058 			    (is_jmp32 &&
8059 			     tnum_is_const(tnum_subreg(src_reg->var_off))))
8060 				reg_set_min_max(&other_branch_regs[insn->dst_reg],
8061 						dst_reg,
8062 						src_reg->var_off.value,
8063 						tnum_subreg(src_reg->var_off).value,
8064 						opcode, is_jmp32);
8065 			else if (tnum_is_const(dst_reg->var_off) ||
8066 				 (is_jmp32 &&
8067 				  tnum_is_const(tnum_subreg(dst_reg->var_off))))
8068 				reg_set_min_max_inv(&other_branch_regs[insn->src_reg],
8069 						    src_reg,
8070 						    dst_reg->var_off.value,
8071 						    tnum_subreg(dst_reg->var_off).value,
8072 						    opcode, is_jmp32);
8073 			else if (!is_jmp32 &&
8074 				 (opcode == BPF_JEQ || opcode == BPF_JNE))
8075 				/* Comparing for equality, we can combine knowledge */
8076 				reg_combine_min_max(&other_branch_regs[insn->src_reg],
8077 						    &other_branch_regs[insn->dst_reg],
8078 						    src_reg, dst_reg, opcode);
8079 			if (src_reg->id &&
8080 			    !WARN_ON_ONCE(src_reg->id != other_branch_regs[insn->src_reg].id)) {
8081 				find_equal_scalars(this_branch, src_reg);
8082 				find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
8083 			}
8084 
8085 		}
8086 	} else if (dst_reg->type == SCALAR_VALUE) {
8087 		reg_set_min_max(&other_branch_regs[insn->dst_reg],
8088 					dst_reg, insn->imm, (u32)insn->imm,
8089 					opcode, is_jmp32);
8090 	}
8091 
8092 	if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
8093 	    !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
8094 		find_equal_scalars(this_branch, dst_reg);
8095 		find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
8096 	}
8097 
8098 	/* detect if R == 0 where R is returned from bpf_map_lookup_elem().
8099 	 * NOTE: these optimizations below are related with pointer comparison
8100 	 *       which will never be JMP32.
8101 	 */
8102 	if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K &&
8103 	    insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
8104 	    reg_type_may_be_null(dst_reg->type)) {
8105 		/* Mark all identical registers in each branch as either
8106 		 * safe or unknown depending R == 0 or R != 0 conditional.
8107 		 */
8108 		mark_ptr_or_null_regs(this_branch, insn->dst_reg,
8109 				      opcode == BPF_JNE);
8110 		mark_ptr_or_null_regs(other_branch, insn->dst_reg,
8111 				      opcode == BPF_JEQ);
8112 	} else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg],
8113 					   this_branch, other_branch) &&
8114 		   is_pointer_value(env, insn->dst_reg)) {
8115 		verbose(env, "R%d pointer comparison prohibited\n",
8116 			insn->dst_reg);
8117 		return -EACCES;
8118 	}
8119 	if (env->log.level & BPF_LOG_LEVEL)
8120 		print_verifier_state(env, this_branch->frame[this_branch->curframe]);
8121 	return 0;
8122 }
8123 
8124 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env * env,struct bpf_insn * insn)8125 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
8126 {
8127 	struct bpf_insn_aux_data *aux = cur_aux(env);
8128 	struct bpf_reg_state *regs = cur_regs(env);
8129 	struct bpf_reg_state *dst_reg;
8130 	struct bpf_map *map;
8131 	int err;
8132 
8133 	if (BPF_SIZE(insn->code) != BPF_DW) {
8134 		verbose(env, "invalid BPF_LD_IMM insn\n");
8135 		return -EINVAL;
8136 	}
8137 	if (insn->off != 0) {
8138 		verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
8139 		return -EINVAL;
8140 	}
8141 
8142 	err = check_reg_arg(env, insn->dst_reg, DST_OP);
8143 	if (err)
8144 		return err;
8145 
8146 	dst_reg = &regs[insn->dst_reg];
8147 	if (insn->src_reg == 0) {
8148 		u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
8149 
8150 		dst_reg->type = SCALAR_VALUE;
8151 		__mark_reg_known(&regs[insn->dst_reg], imm);
8152 		return 0;
8153 	}
8154 
8155 	if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8156 		mark_reg_known_zero(env, regs, insn->dst_reg);
8157 
8158 		dst_reg->type = aux->btf_var.reg_type;
8159 		switch (dst_reg->type) {
8160 		case PTR_TO_MEM:
8161 			dst_reg->mem_size = aux->btf_var.mem_size;
8162 			break;
8163 		case PTR_TO_BTF_ID:
8164 		case PTR_TO_PERCPU_BTF_ID:
8165 			dst_reg->btf_id = aux->btf_var.btf_id;
8166 			break;
8167 		default:
8168 			verbose(env, "bpf verifier is misconfigured\n");
8169 			return -EFAULT;
8170 		}
8171 		return 0;
8172 	}
8173 
8174 	map = env->used_maps[aux->map_index];
8175 	mark_reg_known_zero(env, regs, insn->dst_reg);
8176 	dst_reg->map_ptr = map;
8177 
8178 	if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8179 		dst_reg->type = PTR_TO_MAP_VALUE;
8180 		dst_reg->off = aux->map_off;
8181 		if (map_value_has_spin_lock(map))
8182 			dst_reg->id = ++env->id_gen;
8183 	} else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8184 		dst_reg->type = CONST_PTR_TO_MAP;
8185 	} else {
8186 		verbose(env, "bpf verifier is misconfigured\n");
8187 		return -EINVAL;
8188 	}
8189 
8190 	return 0;
8191 }
8192 
may_access_skb(enum bpf_prog_type type)8193 static bool may_access_skb(enum bpf_prog_type type)
8194 {
8195 	switch (type) {
8196 	case BPF_PROG_TYPE_SOCKET_FILTER:
8197 	case BPF_PROG_TYPE_SCHED_CLS:
8198 	case BPF_PROG_TYPE_SCHED_ACT:
8199 		return true;
8200 	default:
8201 		return false;
8202 	}
8203 }
8204 
8205 /* verify safety of LD_ABS|LD_IND instructions:
8206  * - they can only appear in the programs where ctx == skb
8207  * - since they are wrappers of function calls, they scratch R1-R5 registers,
8208  *   preserve R6-R9, and store return value into R0
8209  *
8210  * Implicit input:
8211  *   ctx == skb == R6 == CTX
8212  *
8213  * Explicit input:
8214  *   SRC == any register
8215  *   IMM == 32-bit immediate
8216  *
8217  * Output:
8218  *   R0 - 8/16/32-bit skb data converted to cpu endianness
8219  */
check_ld_abs(struct bpf_verifier_env * env,struct bpf_insn * insn)8220 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
8221 {
8222 	struct bpf_reg_state *regs = cur_regs(env);
8223 	static const int ctx_reg = BPF_REG_6;
8224 	u8 mode = BPF_MODE(insn->code);
8225 	int i, err;
8226 
8227 	if (!may_access_skb(resolve_prog_type(env->prog))) {
8228 		verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
8229 		return -EINVAL;
8230 	}
8231 
8232 	if (!env->ops->gen_ld_abs) {
8233 		verbose(env, "bpf verifier is misconfigured\n");
8234 		return -EINVAL;
8235 	}
8236 
8237 	if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
8238 	    BPF_SIZE(insn->code) == BPF_DW ||
8239 	    (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
8240 		verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
8241 		return -EINVAL;
8242 	}
8243 
8244 	/* check whether implicit source operand (register R6) is readable */
8245 	err = check_reg_arg(env, ctx_reg, SRC_OP);
8246 	if (err)
8247 		return err;
8248 
8249 	/* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8250 	 * gen_ld_abs() may terminate the program at runtime, leading to
8251 	 * reference leak.
8252 	 */
8253 	err = check_reference_leak(env);
8254 	if (err) {
8255 		verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8256 		return err;
8257 	}
8258 
8259 	if (env->cur_state->active_spin_lock) {
8260 		verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8261 		return -EINVAL;
8262 	}
8263 
8264 	if (regs[ctx_reg].type != PTR_TO_CTX) {
8265 		verbose(env,
8266 			"at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
8267 		return -EINVAL;
8268 	}
8269 
8270 	if (mode == BPF_IND) {
8271 		/* check explicit source operand */
8272 		err = check_reg_arg(env, insn->src_reg, SRC_OP);
8273 		if (err)
8274 			return err;
8275 	}
8276 
8277 	err = check_ctx_reg(env, &regs[ctx_reg], ctx_reg);
8278 	if (err < 0)
8279 		return err;
8280 
8281 	/* reset caller saved regs to unreadable */
8282 	for (i = 0; i < CALLER_SAVED_REGS; i++) {
8283 		mark_reg_not_init(env, regs, caller_saved[i]);
8284 		check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8285 	}
8286 
8287 	/* mark destination R0 register as readable, since it contains
8288 	 * the value fetched from the packet.
8289 	 * Already marked as written above.
8290 	 */
8291 	mark_reg_unknown(env, regs, BPF_REG_0);
8292 	/* ld_abs load up to 32-bit skb data. */
8293 	regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
8294 	return 0;
8295 }
8296 
check_return_code(struct bpf_verifier_env * env)8297 static int check_return_code(struct bpf_verifier_env *env)
8298 {
8299 	struct tnum enforce_attach_type_range = tnum_unknown;
8300 	const struct bpf_prog *prog = env->prog;
8301 	struct bpf_reg_state *reg;
8302 	struct tnum range = tnum_range(0, 1);
8303 	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8304 	int err;
8305 	const bool is_subprog = env->cur_state->frame[0]->subprogno;
8306 
8307 	/* LSM and struct_ops func-ptr's return type could be "void" */
8308 	if (!is_subprog &&
8309 	    (prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
8310 	     prog_type == BPF_PROG_TYPE_LSM) &&
8311 	    !prog->aux->attach_func_proto->type)
8312 		return 0;
8313 
8314 	/* eBPF calling convetion is such that R0 is used
8315 	 * to return the value from eBPF program.
8316 	 * Make sure that it's readable at this time
8317 	 * of bpf_exit, which means that program wrote
8318 	 * something into it earlier
8319 	 */
8320 	err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8321 	if (err)
8322 		return err;
8323 
8324 	if (is_pointer_value(env, BPF_REG_0)) {
8325 		verbose(env, "R0 leaks addr as return value\n");
8326 		return -EACCES;
8327 	}
8328 
8329 	reg = cur_regs(env) + BPF_REG_0;
8330 	if (is_subprog) {
8331 		if (reg->type != SCALAR_VALUE) {
8332 			verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8333 				reg_type_str[reg->type]);
8334 			return -EINVAL;
8335 		}
8336 		return 0;
8337 	}
8338 
8339 	switch (prog_type) {
8340 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
8341 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
8342 		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8343 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8344 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8345 		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8346 		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
8347 			range = tnum_range(1, 1);
8348 		break;
8349 	case BPF_PROG_TYPE_CGROUP_SKB:
8350 		if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8351 			range = tnum_range(0, 3);
8352 			enforce_attach_type_range = tnum_range(2, 3);
8353 		}
8354 		break;
8355 	case BPF_PROG_TYPE_CGROUP_SOCK:
8356 	case BPF_PROG_TYPE_SOCK_OPS:
8357 	case BPF_PROG_TYPE_CGROUP_DEVICE:
8358 	case BPF_PROG_TYPE_CGROUP_SYSCTL:
8359 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
8360 		break;
8361 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
8362 		if (!env->prog->aux->attach_btf_id)
8363 			return 0;
8364 		range = tnum_const(0);
8365 		break;
8366 	case BPF_PROG_TYPE_TRACING:
8367 		switch (env->prog->expected_attach_type) {
8368 		case BPF_TRACE_FENTRY:
8369 		case BPF_TRACE_FEXIT:
8370 			range = tnum_const(0);
8371 			break;
8372 		case BPF_TRACE_RAW_TP:
8373 		case BPF_MODIFY_RETURN:
8374 			return 0;
8375 		case BPF_TRACE_ITER:
8376 			break;
8377 		default:
8378 			return -ENOTSUPP;
8379 		}
8380 		break;
8381 	case BPF_PROG_TYPE_SK_LOOKUP:
8382 		range = tnum_range(SK_DROP, SK_PASS);
8383 		break;
8384 	case BPF_PROG_TYPE_EXT:
8385 		/* freplace program can return anything as its return value
8386 		 * depends on the to-be-replaced kernel func or bpf program.
8387 		 */
8388 	default:
8389 		return 0;
8390 	}
8391 
8392 	if (reg->type != SCALAR_VALUE) {
8393 		verbose(env, "At program exit the register R0 is not a known value (%s)\n",
8394 			reg_type_str[reg->type]);
8395 		return -EINVAL;
8396 	}
8397 
8398 	if (!tnum_in(range, reg->var_off)) {
8399 		char tn_buf[48];
8400 
8401 		verbose(env, "At program exit the register R0 ");
8402 		if (!tnum_is_unknown(reg->var_off)) {
8403 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8404 			verbose(env, "has value %s", tn_buf);
8405 		} else {
8406 			verbose(env, "has unknown scalar value");
8407 		}
8408 		tnum_strn(tn_buf, sizeof(tn_buf), range);
8409 		verbose(env, " should have been in %s\n", tn_buf);
8410 		return -EINVAL;
8411 	}
8412 
8413 	if (!tnum_is_unknown(enforce_attach_type_range) &&
8414 	    tnum_in(enforce_attach_type_range, reg->var_off))
8415 		env->prog->enforce_expected_attach_type = 1;
8416 	return 0;
8417 }
8418 
8419 /* non-recursive DFS pseudo code
8420  * 1  procedure DFS-iterative(G,v):
8421  * 2      label v as discovered
8422  * 3      let S be a stack
8423  * 4      S.push(v)
8424  * 5      while S is not empty
8425  * 6            t <- S.pop()
8426  * 7            if t is what we're looking for:
8427  * 8                return t
8428  * 9            for all edges e in G.adjacentEdges(t) do
8429  * 10               if edge e is already labelled
8430  * 11                   continue with the next edge
8431  * 12               w <- G.adjacentVertex(t,e)
8432  * 13               if vertex w is not discovered and not explored
8433  * 14                   label e as tree-edge
8434  * 15                   label w as discovered
8435  * 16                   S.push(w)
8436  * 17                   continue at 5
8437  * 18               else if vertex w is discovered
8438  * 19                   label e as back-edge
8439  * 20               else
8440  * 21                   // vertex w is explored
8441  * 22                   label e as forward- or cross-edge
8442  * 23           label t as explored
8443  * 24           S.pop()
8444  *
8445  * convention:
8446  * 0x10 - discovered
8447  * 0x11 - discovered and fall-through edge labelled
8448  * 0x12 - discovered and fall-through and branch edges labelled
8449  * 0x20 - explored
8450  */
8451 
8452 enum {
8453 	DISCOVERED = 0x10,
8454 	EXPLORED = 0x20,
8455 	FALLTHROUGH = 1,
8456 	BRANCH = 2,
8457 };
8458 
state_htab_size(struct bpf_verifier_env * env)8459 static u32 state_htab_size(struct bpf_verifier_env *env)
8460 {
8461 	return env->prog->len;
8462 }
8463 
explored_state(struct bpf_verifier_env * env,int idx)8464 static struct bpf_verifier_state_list **explored_state(
8465 					struct bpf_verifier_env *env,
8466 					int idx)
8467 {
8468 	struct bpf_verifier_state *cur = env->cur_state;
8469 	struct bpf_func_state *state = cur->frame[cur->curframe];
8470 
8471 	return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8472 }
8473 
init_explored_state(struct bpf_verifier_env * env,int idx)8474 static void init_explored_state(struct bpf_verifier_env *env, int idx)
8475 {
8476 	env->insn_aux_data[idx].prune_point = true;
8477 }
8478 
8479 /* t, w, e - match pseudo-code above:
8480  * t - index of current instruction
8481  * w - next instruction
8482  * e - edge
8483  */
push_insn(int t,int w,int e,struct bpf_verifier_env * env,bool loop_ok)8484 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
8485 		     bool loop_ok)
8486 {
8487 	int *insn_stack = env->cfg.insn_stack;
8488 	int *insn_state = env->cfg.insn_state;
8489 
8490 	if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
8491 		return 0;
8492 
8493 	if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
8494 		return 0;
8495 
8496 	if (w < 0 || w >= env->prog->len) {
8497 		verbose_linfo(env, t, "%d: ", t);
8498 		verbose(env, "jump out of range from insn %d to %d\n", t, w);
8499 		return -EINVAL;
8500 	}
8501 
8502 	if (e == BRANCH)
8503 		/* mark branch target for state pruning */
8504 		init_explored_state(env, w);
8505 
8506 	if (insn_state[w] == 0) {
8507 		/* tree-edge */
8508 		insn_state[t] = DISCOVERED | e;
8509 		insn_state[w] = DISCOVERED;
8510 		if (env->cfg.cur_stack >= env->prog->len)
8511 			return -E2BIG;
8512 		insn_stack[env->cfg.cur_stack++] = w;
8513 		return 1;
8514 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8515 		if (loop_ok && env->bpf_capable)
8516 			return 0;
8517 		verbose_linfo(env, t, "%d: ", t);
8518 		verbose_linfo(env, w, "%d: ", w);
8519 		verbose(env, "back-edge from insn %d to %d\n", t, w);
8520 		return -EINVAL;
8521 	} else if (insn_state[w] == EXPLORED) {
8522 		/* forward- or cross-edge */
8523 		insn_state[t] = DISCOVERED | e;
8524 	} else {
8525 		verbose(env, "insn state internal bug\n");
8526 		return -EFAULT;
8527 	}
8528 	return 0;
8529 }
8530 
8531 /* non-recursive depth-first-search to detect loops in BPF program
8532  * loop == back-edge in directed graph
8533  */
check_cfg(struct bpf_verifier_env * env)8534 static int check_cfg(struct bpf_verifier_env *env)
8535 {
8536 	struct bpf_insn *insns = env->prog->insnsi;
8537 	int insn_cnt = env->prog->len;
8538 	int *insn_stack, *insn_state;
8539 	int ret = 0;
8540 	int i, t;
8541 
8542 	insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8543 	if (!insn_state)
8544 		return -ENOMEM;
8545 
8546 	insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8547 	if (!insn_stack) {
8548 		kvfree(insn_state);
8549 		return -ENOMEM;
8550 	}
8551 
8552 	insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
8553 	insn_stack[0] = 0; /* 0 is the first instruction */
8554 	env->cfg.cur_stack = 1;
8555 
8556 peek_stack:
8557 	if (env->cfg.cur_stack == 0)
8558 		goto check_state;
8559 	t = insn_stack[env->cfg.cur_stack - 1];
8560 
8561 	if (BPF_CLASS(insns[t].code) == BPF_JMP ||
8562 	    BPF_CLASS(insns[t].code) == BPF_JMP32) {
8563 		u8 opcode = BPF_OP(insns[t].code);
8564 
8565 		if (opcode == BPF_EXIT) {
8566 			goto mark_explored;
8567 		} else if (opcode == BPF_CALL) {
8568 			ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8569 			if (ret == 1)
8570 				goto peek_stack;
8571 			else if (ret < 0)
8572 				goto err_free;
8573 			if (t + 1 < insn_cnt)
8574 				init_explored_state(env, t + 1);
8575 			if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8576 				init_explored_state(env, t);
8577 				ret = push_insn(t, t + insns[t].imm + 1, BRANCH,
8578 						env, false);
8579 				if (ret == 1)
8580 					goto peek_stack;
8581 				else if (ret < 0)
8582 					goto err_free;
8583 			}
8584 		} else if (opcode == BPF_JA) {
8585 			if (BPF_SRC(insns[t].code) != BPF_K) {
8586 				ret = -EINVAL;
8587 				goto err_free;
8588 			}
8589 			/* unconditional jump with single edge */
8590 			ret = push_insn(t, t + insns[t].off + 1,
8591 					FALLTHROUGH, env, true);
8592 			if (ret == 1)
8593 				goto peek_stack;
8594 			else if (ret < 0)
8595 				goto err_free;
8596 			/* unconditional jmp is not a good pruning point,
8597 			 * but it's marked, since backtracking needs
8598 			 * to record jmp history in is_state_visited().
8599 			 */
8600 			init_explored_state(env, t + insns[t].off + 1);
8601 			/* tell verifier to check for equivalent states
8602 			 * after every call and jump
8603 			 */
8604 			if (t + 1 < insn_cnt)
8605 				init_explored_state(env, t + 1);
8606 		} else {
8607 			/* conditional jump with two edges */
8608 			init_explored_state(env, t);
8609 			ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8610 			if (ret == 1)
8611 				goto peek_stack;
8612 			else if (ret < 0)
8613 				goto err_free;
8614 
8615 			ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8616 			if (ret == 1)
8617 				goto peek_stack;
8618 			else if (ret < 0)
8619 				goto err_free;
8620 		}
8621 	} else {
8622 		/* all other non-branch instructions with single
8623 		 * fall-through edge
8624 		 */
8625 		ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8626 		if (ret == 1)
8627 			goto peek_stack;
8628 		else if (ret < 0)
8629 			goto err_free;
8630 	}
8631 
8632 mark_explored:
8633 	insn_state[t] = EXPLORED;
8634 	if (env->cfg.cur_stack-- <= 0) {
8635 		verbose(env, "pop stack internal bug\n");
8636 		ret = -EFAULT;
8637 		goto err_free;
8638 	}
8639 	goto peek_stack;
8640 
8641 check_state:
8642 	for (i = 0; i < insn_cnt; i++) {
8643 		if (insn_state[i] != EXPLORED) {
8644 			verbose(env, "unreachable insn %d\n", i);
8645 			ret = -EINVAL;
8646 			goto err_free;
8647 		}
8648 	}
8649 	ret = 0; /* cfg looks good */
8650 
8651 err_free:
8652 	kvfree(insn_state);
8653 	kvfree(insn_stack);
8654 	env->cfg.insn_state = env->cfg.insn_stack = NULL;
8655 	return ret;
8656 }
8657 
check_abnormal_return(struct bpf_verifier_env * env)8658 static int check_abnormal_return(struct bpf_verifier_env *env)
8659 {
8660 	int i;
8661 
8662 	for (i = 1; i < env->subprog_cnt; i++) {
8663 		if (env->subprog_info[i].has_ld_abs) {
8664 			verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8665 			return -EINVAL;
8666 		}
8667 		if (env->subprog_info[i].has_tail_call) {
8668 			verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8669 			return -EINVAL;
8670 		}
8671 	}
8672 	return 0;
8673 }
8674 
8675 /* The minimum supported BTF func info size */
8676 #define MIN_BPF_FUNCINFO_SIZE	8
8677 #define MAX_FUNCINFO_REC_SIZE	252
8678 
check_btf_func(struct bpf_verifier_env * env,const union bpf_attr * attr,union bpf_attr __user * uattr)8679 static int check_btf_func(struct bpf_verifier_env *env,
8680 			  const union bpf_attr *attr,
8681 			  union bpf_attr __user *uattr)
8682 {
8683 	const struct btf_type *type, *func_proto, *ret_type;
8684 	u32 i, nfuncs, urec_size, min_size;
8685 	u32 krec_size = sizeof(struct bpf_func_info);
8686 	struct bpf_func_info *krecord;
8687 	struct bpf_func_info_aux *info_aux = NULL;
8688 	struct bpf_prog *prog;
8689 	const struct btf *btf;
8690 	void __user *urecord;
8691 	u32 prev_offset = 0;
8692 	bool scalar_return;
8693 	int ret = -ENOMEM;
8694 
8695 	nfuncs = attr->func_info_cnt;
8696 	if (!nfuncs) {
8697 		if (check_abnormal_return(env))
8698 			return -EINVAL;
8699 		return 0;
8700 	}
8701 
8702 	if (nfuncs != env->subprog_cnt) {
8703 		verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8704 		return -EINVAL;
8705 	}
8706 
8707 	urec_size = attr->func_info_rec_size;
8708 	if (urec_size < MIN_BPF_FUNCINFO_SIZE ||
8709 	    urec_size > MAX_FUNCINFO_REC_SIZE ||
8710 	    urec_size % sizeof(u32)) {
8711 		verbose(env, "invalid func info rec size %u\n", urec_size);
8712 		return -EINVAL;
8713 	}
8714 
8715 	prog = env->prog;
8716 	btf = prog->aux->btf;
8717 
8718 	urecord = u64_to_user_ptr(attr->func_info);
8719 	min_size = min_t(u32, krec_size, urec_size);
8720 
8721 	krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8722 	if (!krecord)
8723 		return -ENOMEM;
8724 	info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8725 	if (!info_aux)
8726 		goto err_free;
8727 
8728 	for (i = 0; i < nfuncs; i++) {
8729 		ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8730 		if (ret) {
8731 			if (ret == -E2BIG) {
8732 				verbose(env, "nonzero tailing record in func info");
8733 				/* set the size kernel expects so loader can zero
8734 				 * out the rest of the record.
8735 				 */
8736 				if (put_user(min_size, &uattr->func_info_rec_size))
8737 					ret = -EFAULT;
8738 			}
8739 			goto err_free;
8740 		}
8741 
8742 		if (copy_from_user(&krecord[i], urecord, min_size)) {
8743 			ret = -EFAULT;
8744 			goto err_free;
8745 		}
8746 
8747 		/* check insn_off */
8748 		ret = -EINVAL;
8749 		if (i == 0) {
8750 			if (krecord[i].insn_off) {
8751 				verbose(env,
8752 					"nonzero insn_off %u for the first func info record",
8753 					krecord[i].insn_off);
8754 				goto err_free;
8755 			}
8756 		} else if (krecord[i].insn_off <= prev_offset) {
8757 			verbose(env,
8758 				"same or smaller insn offset (%u) than previous func info record (%u)",
8759 				krecord[i].insn_off, prev_offset);
8760 			goto err_free;
8761 		}
8762 
8763 		if (env->subprog_info[i].start != krecord[i].insn_off) {
8764 			verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8765 			goto err_free;
8766 		}
8767 
8768 		/* check type_id */
8769 		type = btf_type_by_id(btf, krecord[i].type_id);
8770 		if (!type || !btf_type_is_func(type)) {
8771 			verbose(env, "invalid type id %d in func info",
8772 				krecord[i].type_id);
8773 			goto err_free;
8774 		}
8775 		info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8776 
8777 		func_proto = btf_type_by_id(btf, type->type);
8778 		if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
8779 			/* btf_func_check() already verified it during BTF load */
8780 			goto err_free;
8781 		ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8782 		scalar_return =
8783 			btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8784 		if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8785 			verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8786 			goto err_free;
8787 		}
8788 		if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8789 			verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8790 			goto err_free;
8791 		}
8792 
8793 		prev_offset = krecord[i].insn_off;
8794 		urecord += urec_size;
8795 	}
8796 
8797 	prog->aux->func_info = krecord;
8798 	prog->aux->func_info_cnt = nfuncs;
8799 	prog->aux->func_info_aux = info_aux;
8800 	return 0;
8801 
8802 err_free:
8803 	kvfree(krecord);
8804 	kfree(info_aux);
8805 	return ret;
8806 }
8807 
adjust_btf_func(struct bpf_verifier_env * env)8808 static void adjust_btf_func(struct bpf_verifier_env *env)
8809 {
8810 	struct bpf_prog_aux *aux = env->prog->aux;
8811 	int i;
8812 
8813 	if (!aux->func_info)
8814 		return;
8815 
8816 	for (i = 0; i < env->subprog_cnt; i++)
8817 		aux->func_info[i].insn_off = env->subprog_info[i].start;
8818 }
8819 
8820 #define MIN_BPF_LINEINFO_SIZE	(offsetof(struct bpf_line_info, line_col) + \
8821 		sizeof(((struct bpf_line_info *)(0))->line_col))
8822 #define MAX_LINEINFO_REC_SIZE	MAX_FUNCINFO_REC_SIZE
8823 
check_btf_line(struct bpf_verifier_env * env,const union bpf_attr * attr,union bpf_attr __user * uattr)8824 static int check_btf_line(struct bpf_verifier_env *env,
8825 			  const union bpf_attr *attr,
8826 			  union bpf_attr __user *uattr)
8827 {
8828 	u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8829 	struct bpf_subprog_info *sub;
8830 	struct bpf_line_info *linfo;
8831 	struct bpf_prog *prog;
8832 	const struct btf *btf;
8833 	void __user *ulinfo;
8834 	int err;
8835 
8836 	nr_linfo = attr->line_info_cnt;
8837 	if (!nr_linfo)
8838 		return 0;
8839 	if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info))
8840 		return -EINVAL;
8841 
8842 	rec_size = attr->line_info_rec_size;
8843 	if (rec_size < MIN_BPF_LINEINFO_SIZE ||
8844 	    rec_size > MAX_LINEINFO_REC_SIZE ||
8845 	    rec_size & (sizeof(u32) - 1))
8846 		return -EINVAL;
8847 
8848 	/* Need to zero it in case the userspace may
8849 	 * pass in a smaller bpf_line_info object.
8850 	 */
8851 	linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info),
8852 			 GFP_KERNEL | __GFP_NOWARN);
8853 	if (!linfo)
8854 		return -ENOMEM;
8855 
8856 	prog = env->prog;
8857 	btf = prog->aux->btf;
8858 
8859 	s = 0;
8860 	sub = env->subprog_info;
8861 	ulinfo = u64_to_user_ptr(attr->line_info);
8862 	expected_size = sizeof(struct bpf_line_info);
8863 	ncopy = min_t(u32, expected_size, rec_size);
8864 	for (i = 0; i < nr_linfo; i++) {
8865 		err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8866 		if (err) {
8867 			if (err == -E2BIG) {
8868 				verbose(env, "nonzero tailing record in line_info");
8869 				if (put_user(expected_size,
8870 					     &uattr->line_info_rec_size))
8871 					err = -EFAULT;
8872 			}
8873 			goto err_free;
8874 		}
8875 
8876 		if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8877 			err = -EFAULT;
8878 			goto err_free;
8879 		}
8880 
8881 		/*
8882 		 * Check insn_off to ensure
8883 		 * 1) strictly increasing AND
8884 		 * 2) bounded by prog->len
8885 		 *
8886 		 * The linfo[0].insn_off == 0 check logically falls into
8887 		 * the later "missing bpf_line_info for func..." case
8888 		 * because the first linfo[0].insn_off must be the
8889 		 * first sub also and the first sub must have
8890 		 * subprog_info[0].start == 0.
8891 		 */
8892 		if ((i && linfo[i].insn_off <= prev_offset) ||
8893 		    linfo[i].insn_off >= prog->len) {
8894 			verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n",
8895 				i, linfo[i].insn_off, prev_offset,
8896 				prog->len);
8897 			err = -EINVAL;
8898 			goto err_free;
8899 		}
8900 
8901 		if (!prog->insnsi[linfo[i].insn_off].code) {
8902 			verbose(env,
8903 				"Invalid insn code at line_info[%u].insn_off\n",
8904 				i);
8905 			err = -EINVAL;
8906 			goto err_free;
8907 		}
8908 
8909 		if (!btf_name_by_offset(btf, linfo[i].line_off) ||
8910 		    !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8911 			verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8912 			err = -EINVAL;
8913 			goto err_free;
8914 		}
8915 
8916 		if (s != env->subprog_cnt) {
8917 			if (linfo[i].insn_off == sub[s].start) {
8918 				sub[s].linfo_idx = i;
8919 				s++;
8920 			} else if (sub[s].start < linfo[i].insn_off) {
8921 				verbose(env, "missing bpf_line_info for func#%u\n", s);
8922 				err = -EINVAL;
8923 				goto err_free;
8924 			}
8925 		}
8926 
8927 		prev_offset = linfo[i].insn_off;
8928 		ulinfo += rec_size;
8929 	}
8930 
8931 	if (s != env->subprog_cnt) {
8932 		verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n",
8933 			env->subprog_cnt - s, s);
8934 		err = -EINVAL;
8935 		goto err_free;
8936 	}
8937 
8938 	prog->aux->linfo = linfo;
8939 	prog->aux->nr_linfo = nr_linfo;
8940 
8941 	return 0;
8942 
8943 err_free:
8944 	kvfree(linfo);
8945 	return err;
8946 }
8947 
check_btf_info(struct bpf_verifier_env * env,const union bpf_attr * attr,union bpf_attr __user * uattr)8948 static int check_btf_info(struct bpf_verifier_env *env,
8949 			  const union bpf_attr *attr,
8950 			  union bpf_attr __user *uattr)
8951 {
8952 	struct btf *btf;
8953 	int err;
8954 
8955 	if (!attr->func_info_cnt && !attr->line_info_cnt) {
8956 		if (check_abnormal_return(env))
8957 			return -EINVAL;
8958 		return 0;
8959 	}
8960 
8961 	btf = btf_get_by_fd(attr->prog_btf_fd);
8962 	if (IS_ERR(btf))
8963 		return PTR_ERR(btf);
8964 	env->prog->aux->btf = btf;
8965 
8966 	err = check_btf_func(env, attr, uattr);
8967 	if (err)
8968 		return err;
8969 
8970 	err = check_btf_line(env, attr, uattr);
8971 	if (err)
8972 		return err;
8973 
8974 	return 0;
8975 }
8976 
8977 /* check %cur's range satisfies %old's */
range_within(struct bpf_reg_state * old,struct bpf_reg_state * cur)8978 static bool range_within(struct bpf_reg_state *old,
8979 			 struct bpf_reg_state *cur)
8980 {
8981 	return old->umin_value <= cur->umin_value &&
8982 	       old->umax_value >= cur->umax_value &&
8983 	       old->smin_value <= cur->smin_value &&
8984 	       old->smax_value >= cur->smax_value &&
8985 	       old->u32_min_value <= cur->u32_min_value &&
8986 	       old->u32_max_value >= cur->u32_max_value &&
8987 	       old->s32_min_value <= cur->s32_min_value &&
8988 	       old->s32_max_value >= cur->s32_max_value;
8989 }
8990 
8991 /* If in the old state two registers had the same id, then they need to have
8992  * the same id in the new state as well.  But that id could be different from
8993  * the old state, so we need to track the mapping from old to new ids.
8994  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
8995  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
8996  * regs with a different old id could still have new id 9, we don't care about
8997  * that.
8998  * So we look through our idmap to see if this old id has been seen before.  If
8999  * so, we require the new id to match; otherwise, we add the id pair to the map.
9000  */
check_ids(u32 old_id,u32 cur_id,struct bpf_id_pair * idmap)9001 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
9002 {
9003 	unsigned int i;
9004 
9005 	for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
9006 		if (!idmap[i].old) {
9007 			/* Reached an empty slot; haven't seen this id before */
9008 			idmap[i].old = old_id;
9009 			idmap[i].cur = cur_id;
9010 			return true;
9011 		}
9012 		if (idmap[i].old == old_id)
9013 			return idmap[i].cur == cur_id;
9014 	}
9015 	/* We ran out of idmap slots, which should be impossible */
9016 	WARN_ON_ONCE(1);
9017 	return false;
9018 }
9019 
clean_func_state(struct bpf_verifier_env * env,struct bpf_func_state * st)9020 static void clean_func_state(struct bpf_verifier_env *env,
9021 			     struct bpf_func_state *st)
9022 {
9023 	enum bpf_reg_liveness live;
9024 	int i, j;
9025 
9026 	for (i = 0; i < BPF_REG_FP; i++) {
9027 		live = st->regs[i].live;
9028 		/* liveness must not touch this register anymore */
9029 		st->regs[i].live |= REG_LIVE_DONE;
9030 		if (!(live & REG_LIVE_READ))
9031 			/* since the register is unused, clear its state
9032 			 * to make further comparison simpler
9033 			 */
9034 			__mark_reg_not_init(env, &st->regs[i]);
9035 	}
9036 
9037 	for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
9038 		live = st->stack[i].spilled_ptr.live;
9039 		/* liveness must not touch this stack slot anymore */
9040 		st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
9041 		if (!(live & REG_LIVE_READ)) {
9042 			__mark_reg_not_init(env, &st->stack[i].spilled_ptr);
9043 			for (j = 0; j < BPF_REG_SIZE; j++)
9044 				st->stack[i].slot_type[j] = STACK_INVALID;
9045 		}
9046 	}
9047 }
9048 
clean_verifier_state(struct bpf_verifier_env * env,struct bpf_verifier_state * st)9049 static void clean_verifier_state(struct bpf_verifier_env *env,
9050 				 struct bpf_verifier_state *st)
9051 {
9052 	int i;
9053 
9054 	if (st->frame[0]->regs[0].live & REG_LIVE_DONE)
9055 		/* all regs in this state in all frames were already marked */
9056 		return;
9057 
9058 	for (i = 0; i <= st->curframe; i++)
9059 		clean_func_state(env, st->frame[i]);
9060 }
9061 
9062 /* the parentage chains form a tree.
9063  * the verifier states are added to state lists at given insn and
9064  * pushed into state stack for future exploration.
9065  * when the verifier reaches bpf_exit insn some of the verifer states
9066  * stored in the state lists have their final liveness state already,
9067  * but a lot of states will get revised from liveness point of view when
9068  * the verifier explores other branches.
9069  * Example:
9070  * 1: r0 = 1
9071  * 2: if r1 == 100 goto pc+1
9072  * 3: r0 = 2
9073  * 4: exit
9074  * when the verifier reaches exit insn the register r0 in the state list of
9075  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
9076  * of insn 2 and goes exploring further. At the insn 4 it will walk the
9077  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
9078  *
9079  * Since the verifier pushes the branch states as it sees them while exploring
9080  * the program the condition of walking the branch instruction for the second
9081  * time means that all states below this branch were already explored and
9082  * their final liveness markes are already propagated.
9083  * Hence when the verifier completes the search of state list in is_state_visited()
9084  * we can call this clean_live_states() function to mark all liveness states
9085  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
9086  * will not be used.
9087  * This function also clears the registers and stack for states that !READ
9088  * to simplify state merging.
9089  *
9090  * Important note here that walking the same branch instruction in the callee
9091  * doesn't meant that the states are DONE. The verifier has to compare
9092  * the callsites
9093  */
clean_live_states(struct bpf_verifier_env * env,int insn,struct bpf_verifier_state * cur)9094 static void clean_live_states(struct bpf_verifier_env *env, int insn,
9095 			      struct bpf_verifier_state *cur)
9096 {
9097 	struct bpf_verifier_state_list *sl;
9098 	int i;
9099 
9100 	sl = *explored_state(env, insn);
9101 	while (sl) {
9102 		if (sl->state.branches)
9103 			goto next;
9104 		if (sl->state.insn_idx != insn ||
9105 		    sl->state.curframe != cur->curframe)
9106 			goto next;
9107 		for (i = 0; i <= cur->curframe; i++)
9108 			if (sl->state.frame[i]->callsite != cur->frame[i]->callsite)
9109 				goto next;
9110 		clean_verifier_state(env, &sl->state);
9111 next:
9112 		sl = sl->next;
9113 	}
9114 }
9115 
9116 /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env * env,struct bpf_reg_state * rold,struct bpf_reg_state * rcur,struct bpf_id_pair * idmap)9117 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
9118 		    struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)
9119 {
9120 	bool equal;
9121 
9122 	if (!(rold->live & REG_LIVE_READ))
9123 		/* explored state didn't use this */
9124 		return true;
9125 
9126 	equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
9127 
9128 	if (rold->type == PTR_TO_STACK)
9129 		/* two stack pointers are equal only if they're pointing to
9130 		 * the same stack frame, since fp-8 in foo != fp-8 in bar
9131 		 */
9132 		return equal && rold->frameno == rcur->frameno;
9133 
9134 	if (equal)
9135 		return true;
9136 
9137 	if (rold->type == NOT_INIT)
9138 		/* explored state can't have used this */
9139 		return true;
9140 	if (rcur->type == NOT_INIT)
9141 		return false;
9142 	switch (rold->type) {
9143 	case SCALAR_VALUE:
9144 		if (env->explore_alu_limits)
9145 			return false;
9146 		if (rcur->type == SCALAR_VALUE) {
9147 			if (!rold->precise && !rcur->precise)
9148 				return true;
9149 			/* new val must satisfy old val knowledge */
9150 			return range_within(rold, rcur) &&
9151 			       tnum_in(rold->var_off, rcur->var_off);
9152 		} else {
9153 			/* We're trying to use a pointer in place of a scalar.
9154 			 * Even if the scalar was unbounded, this could lead to
9155 			 * pointer leaks because scalars are allowed to leak
9156 			 * while pointers are not. We could make this safe in
9157 			 * special cases if root is calling us, but it's
9158 			 * probably not worth the hassle.
9159 			 */
9160 			return false;
9161 		}
9162 	case PTR_TO_MAP_VALUE:
9163 		/* If the new min/max/var_off satisfy the old ones and
9164 		 * everything else matches, we are OK.
9165 		 * 'id' is not compared, since it's only used for maps with
9166 		 * bpf_spin_lock inside map element and in such cases if
9167 		 * the rest of the prog is valid for one map element then
9168 		 * it's valid for all map elements regardless of the key
9169 		 * used in bpf_map_lookup()
9170 		 */
9171 		return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
9172 		       range_within(rold, rcur) &&
9173 		       tnum_in(rold->var_off, rcur->var_off);
9174 	case PTR_TO_MAP_VALUE_OR_NULL:
9175 		/* a PTR_TO_MAP_VALUE could be safe to use as a
9176 		 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
9177 		 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
9178 		 * checked, doing so could have affected others with the same
9179 		 * id, and we can't check for that because we lost the id when
9180 		 * we converted to a PTR_TO_MAP_VALUE.
9181 		 */
9182 		if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL)
9183 			return false;
9184 		if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)))
9185 			return false;
9186 		/* Check our ids match any regs they're supposed to */
9187 		return check_ids(rold->id, rcur->id, idmap);
9188 	case PTR_TO_PACKET_META:
9189 	case PTR_TO_PACKET:
9190 		if (rcur->type != rold->type)
9191 			return false;
9192 		/* We must have at least as much range as the old ptr
9193 		 * did, so that any accesses which were safe before are
9194 		 * still safe.  This is true even if old range < old off,
9195 		 * since someone could have accessed through (ptr - k), or
9196 		 * even done ptr -= k in a register, to get a safe access.
9197 		 */
9198 		if (rold->range > rcur->range)
9199 			return false;
9200 		/* If the offsets don't match, we can't trust our alignment;
9201 		 * nor can we be sure that we won't fall out of range.
9202 		 */
9203 		if (rold->off != rcur->off)
9204 			return false;
9205 		/* id relations must be preserved */
9206 		if (rold->id && !check_ids(rold->id, rcur->id, idmap))
9207 			return false;
9208 		/* new val must satisfy old val knowledge */
9209 		return range_within(rold, rcur) &&
9210 		       tnum_in(rold->var_off, rcur->var_off);
9211 	case PTR_TO_CTX:
9212 	case CONST_PTR_TO_MAP:
9213 	case PTR_TO_PACKET_END:
9214 	case PTR_TO_FLOW_KEYS:
9215 	case PTR_TO_SOCKET:
9216 	case PTR_TO_SOCKET_OR_NULL:
9217 	case PTR_TO_SOCK_COMMON:
9218 	case PTR_TO_SOCK_COMMON_OR_NULL:
9219 	case PTR_TO_TCP_SOCK:
9220 	case PTR_TO_TCP_SOCK_OR_NULL:
9221 	case PTR_TO_XDP_SOCK:
9222 		/* Only valid matches are exact, which memcmp() above
9223 		 * would have accepted
9224 		 */
9225 	default:
9226 		/* Don't know what's going on, just say it's not safe */
9227 		return false;
9228 	}
9229 
9230 	/* Shouldn't get here; if we do, say it's not safe */
9231 	WARN_ON_ONCE(1);
9232 	return false;
9233 }
9234 
stacksafe(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur,struct bpf_id_pair * idmap)9235 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
9236 		      struct bpf_func_state *cur, struct bpf_id_pair *idmap)
9237 {
9238 	int i, spi;
9239 
9240 	/* walk slots of the explored stack and ignore any additional
9241 	 * slots in the current stack, since explored(safe) state
9242 	 * didn't use them
9243 	 */
9244 	for (i = 0; i < old->allocated_stack; i++) {
9245 		spi = i / BPF_REG_SIZE;
9246 
9247 		if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9248 			i += BPF_REG_SIZE - 1;
9249 			/* explored state didn't use this */
9250 			continue;
9251 		}
9252 
9253 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID)
9254 			continue;
9255 
9256 		/* explored stack has more populated slots than current stack
9257 		 * and these slots were used
9258 		 */
9259 		if (i >= cur->allocated_stack)
9260 			return false;
9261 
9262 		/* if old state was safe with misc data in the stack
9263 		 * it will be safe with zero-initialized stack.
9264 		 * The opposite is not true
9265 		 */
9266 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
9267 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO)
9268 			continue;
9269 		if (old->stack[spi].slot_type[i % BPF_REG_SIZE] !=
9270 		    cur->stack[spi].slot_type[i % BPF_REG_SIZE])
9271 			/* Ex: old explored (safe) state has STACK_SPILL in
9272 			 * this stack slot, but current has STACK_MISC ->
9273 			 * this verifier states are not equivalent,
9274 			 * return false to continue verification of this path
9275 			 */
9276 			return false;
9277 		if (i % BPF_REG_SIZE)
9278 			continue;
9279 		if (old->stack[spi].slot_type[0] != STACK_SPILL)
9280 			continue;
9281 		if (!regsafe(env, &old->stack[spi].spilled_ptr,
9282 			     &cur->stack[spi].spilled_ptr, idmap))
9283 			/* when explored and current stack slot are both storing
9284 			 * spilled registers, check that stored pointers types
9285 			 * are the same as well.
9286 			 * Ex: explored safe path could have stored
9287 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
9288 			 * but current path has stored:
9289 			 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
9290 			 * such verifier states are not equivalent.
9291 			 * return false to continue verification of this path
9292 			 */
9293 			return false;
9294 	}
9295 	return true;
9296 }
9297 
refsafe(struct bpf_func_state * old,struct bpf_func_state * cur)9298 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9299 {
9300 	if (old->acquired_refs != cur->acquired_refs)
9301 		return false;
9302 	return !memcmp(old->refs, cur->refs,
9303 		       sizeof(*old->refs) * old->acquired_refs);
9304 }
9305 
9306 /* compare two verifier states
9307  *
9308  * all states stored in state_list are known to be valid, since
9309  * verifier reached 'bpf_exit' instruction through them
9310  *
9311  * this function is called when verifier exploring different branches of
9312  * execution popped from the state stack. If it sees an old state that has
9313  * more strict register state and more strict stack state then this execution
9314  * branch doesn't need to be explored further, since verifier already
9315  * concluded that more strict state leads to valid finish.
9316  *
9317  * Therefore two states are equivalent if register state is more conservative
9318  * and explored stack state is more conservative than the current one.
9319  * Example:
9320  *       explored                   current
9321  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
9322  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
9323  *
9324  * In other words if current stack state (one being explored) has more
9325  * valid slots than old one that already passed validation, it means
9326  * the verifier can stop exploring and conclude that current state is valid too
9327  *
9328  * Similarly with registers. If explored state has register type as invalid
9329  * whereas register type in current state is meaningful, it means that
9330  * the current state will reach 'bpf_exit' instruction safely
9331  */
func_states_equal(struct bpf_verifier_env * env,struct bpf_func_state * old,struct bpf_func_state * cur)9332 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old,
9333 			      struct bpf_func_state *cur)
9334 {
9335 	int i;
9336 
9337 	memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
9338 	for (i = 0; i < MAX_BPF_REG; i++)
9339 		if (!regsafe(env, &old->regs[i], &cur->regs[i],
9340 			     env->idmap_scratch))
9341 			return false;
9342 
9343 	if (!stacksafe(env, old, cur, env->idmap_scratch))
9344 		return false;
9345 
9346 	if (!refsafe(old, cur))
9347 		return false;
9348 
9349 	return true;
9350 }
9351 
states_equal(struct bpf_verifier_env * env,struct bpf_verifier_state * old,struct bpf_verifier_state * cur)9352 static bool states_equal(struct bpf_verifier_env *env,
9353 			 struct bpf_verifier_state *old,
9354 			 struct bpf_verifier_state *cur)
9355 {
9356 	int i;
9357 
9358 	if (old->curframe != cur->curframe)
9359 		return false;
9360 
9361 	/* Verification state from speculative execution simulation
9362 	 * must never prune a non-speculative execution one.
9363 	 */
9364 	if (old->speculative && !cur->speculative)
9365 		return false;
9366 
9367 	if (old->active_spin_lock != cur->active_spin_lock)
9368 		return false;
9369 
9370 	/* for states to be equal callsites have to be the same
9371 	 * and all frame states need to be equivalent
9372 	 */
9373 	for (i = 0; i <= old->curframe; i++) {
9374 		if (old->frame[i]->callsite != cur->frame[i]->callsite)
9375 			return false;
9376 		if (!func_states_equal(env, old->frame[i], cur->frame[i]))
9377 			return false;
9378 	}
9379 	return true;
9380 }
9381 
9382 /* Return 0 if no propagation happened. Return negative error code if error
9383  * happened. Otherwise, return the propagated bit.
9384  */
propagate_liveness_reg(struct bpf_verifier_env * env,struct bpf_reg_state * reg,struct bpf_reg_state * parent_reg)9385 static int propagate_liveness_reg(struct bpf_verifier_env *env,
9386 				  struct bpf_reg_state *reg,
9387 				  struct bpf_reg_state *parent_reg)
9388 {
9389 	u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9390 	u8 flag = reg->live & REG_LIVE_READ;
9391 	int err;
9392 
9393 	/* When comes here, read flags of PARENT_REG or REG could be any of
9394 	 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9395 	 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9396 	 */
9397 	if (parent_flag == REG_LIVE_READ64 ||
9398 	    /* Or if there is no read flag from REG. */
9399 	    !flag ||
9400 	    /* Or if the read flag from REG is the same as PARENT_REG. */
9401 	    parent_flag == flag)
9402 		return 0;
9403 
9404 	err = mark_reg_read(env, reg, parent_reg, flag);
9405 	if (err)
9406 		return err;
9407 
9408 	return flag;
9409 }
9410 
9411 /* A write screens off any subsequent reads; but write marks come from the
9412  * straight-line code between a state and its parent.  When we arrive at an
9413  * equivalent state (jump target or such) we didn't arrive by the straight-line
9414  * code, so read marks in the state must propagate to the parent regardless
9415  * of the state's write marks. That's what 'parent == state->parent' comparison
9416  * in mark_reg_read() is for.
9417  */
propagate_liveness(struct bpf_verifier_env * env,const struct bpf_verifier_state * vstate,struct bpf_verifier_state * vparent)9418 static int propagate_liveness(struct bpf_verifier_env *env,
9419 			      const struct bpf_verifier_state *vstate,
9420 			      struct bpf_verifier_state *vparent)
9421 {
9422 	struct bpf_reg_state *state_reg, *parent_reg;
9423 	struct bpf_func_state *state, *parent;
9424 	int i, frame, err = 0;
9425 
9426 	if (vparent->curframe != vstate->curframe) {
9427 		WARN(1, "propagate_live: parent frame %d current frame %d\n",
9428 		     vparent->curframe, vstate->curframe);
9429 		return -EFAULT;
9430 	}
9431 	/* Propagate read liveness of registers... */
9432 	BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
9433 	for (frame = 0; frame <= vstate->curframe; frame++) {
9434 		parent = vparent->frame[frame];
9435 		state = vstate->frame[frame];
9436 		parent_reg = parent->regs;
9437 		state_reg = state->regs;
9438 		/* We don't need to worry about FP liveness, it's read-only */
9439 		for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9440 			err = propagate_liveness_reg(env, &state_reg[i],
9441 						     &parent_reg[i]);
9442 			if (err < 0)
9443 				return err;
9444 			if (err == REG_LIVE_READ64)
9445 				mark_insn_zext(env, &parent_reg[i]);
9446 		}
9447 
9448 		/* Propagate stack slots. */
9449 		for (i = 0; i < state->allocated_stack / BPF_REG_SIZE &&
9450 			    i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9451 			parent_reg = &parent->stack[i].spilled_ptr;
9452 			state_reg = &state->stack[i].spilled_ptr;
9453 			err = propagate_liveness_reg(env, state_reg,
9454 						     parent_reg);
9455 			if (err < 0)
9456 				return err;
9457 		}
9458 	}
9459 	return 0;
9460 }
9461 
9462 /* find precise scalars in the previous equivalent state and
9463  * propagate them into the current state
9464  */
propagate_precision(struct bpf_verifier_env * env,const struct bpf_verifier_state * old)9465 static int propagate_precision(struct bpf_verifier_env *env,
9466 			       const struct bpf_verifier_state *old)
9467 {
9468 	struct bpf_reg_state *state_reg;
9469 	struct bpf_func_state *state;
9470 	int i, err = 0;
9471 
9472 	state = old->frame[old->curframe];
9473 	state_reg = state->regs;
9474 	for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9475 		if (state_reg->type != SCALAR_VALUE ||
9476 		    !state_reg->precise)
9477 			continue;
9478 		if (env->log.level & BPF_LOG_LEVEL2)
9479 			verbose(env, "propagating r%d\n", i);
9480 		err = mark_chain_precision(env, i);
9481 		if (err < 0)
9482 			return err;
9483 	}
9484 
9485 	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9486 		if (state->stack[i].slot_type[0] != STACK_SPILL)
9487 			continue;
9488 		state_reg = &state->stack[i].spilled_ptr;
9489 		if (state_reg->type != SCALAR_VALUE ||
9490 		    !state_reg->precise)
9491 			continue;
9492 		if (env->log.level & BPF_LOG_LEVEL2)
9493 			verbose(env, "propagating fp%d\n",
9494 				(-i - 1) * BPF_REG_SIZE);
9495 		err = mark_chain_precision_stack(env, i);
9496 		if (err < 0)
9497 			return err;
9498 	}
9499 	return 0;
9500 }
9501 
states_maybe_looping(struct bpf_verifier_state * old,struct bpf_verifier_state * cur)9502 static bool states_maybe_looping(struct bpf_verifier_state *old,
9503 				 struct bpf_verifier_state *cur)
9504 {
9505 	struct bpf_func_state *fold, *fcur;
9506 	int i, fr = cur->curframe;
9507 
9508 	if (old->curframe != fr)
9509 		return false;
9510 
9511 	fold = old->frame[fr];
9512 	fcur = cur->frame[fr];
9513 	for (i = 0; i < MAX_BPF_REG; i++)
9514 		if (memcmp(&fold->regs[i], &fcur->regs[i],
9515 			   offsetof(struct bpf_reg_state, parent)))
9516 			return false;
9517 	return true;
9518 }
9519 
9520 
is_state_visited(struct bpf_verifier_env * env,int insn_idx)9521 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9522 {
9523 	struct bpf_verifier_state_list *new_sl;
9524 	struct bpf_verifier_state_list *sl, **pprev;
9525 	struct bpf_verifier_state *cur = env->cur_state, *new;
9526 	int i, j, err, states_cnt = 0;
9527 	bool add_new_state = env->test_state_freq ? true : false;
9528 
9529 	cur->last_insn_idx = env->prev_insn_idx;
9530 	if (!env->insn_aux_data[insn_idx].prune_point)
9531 		/* this 'insn_idx' instruction wasn't marked, so we will not
9532 		 * be doing state search here
9533 		 */
9534 		return 0;
9535 
9536 	/* bpf progs typically have pruning point every 4 instructions
9537 	 * http://vger.kernel.org/bpfconf2019.html#session-1
9538 	 * Do not add new state for future pruning if the verifier hasn't seen
9539 	 * at least 2 jumps and at least 8 instructions.
9540 	 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9541 	 * In tests that amounts to up to 50% reduction into total verifier
9542 	 * memory consumption and 20% verifier time speedup.
9543 	 */
9544 	if (env->jmps_processed - env->prev_jmps_processed >= 2 &&
9545 	    env->insn_processed - env->prev_insn_processed >= 8)
9546 		add_new_state = true;
9547 
9548 	pprev = explored_state(env, insn_idx);
9549 	sl = *pprev;
9550 
9551 	clean_live_states(env, insn_idx, cur);
9552 
9553 	while (sl) {
9554 		states_cnt++;
9555 		if (sl->state.insn_idx != insn_idx)
9556 			goto next;
9557 		if (sl->state.branches) {
9558 			if (states_maybe_looping(&sl->state, cur) &&
9559 			    states_equal(env, &sl->state, cur)) {
9560 				verbose_linfo(env, insn_idx, "; ");
9561 				verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9562 				return -EINVAL;
9563 			}
9564 			/* if the verifier is processing a loop, avoid adding new state
9565 			 * too often, since different loop iterations have distinct
9566 			 * states and may not help future pruning.
9567 			 * This threshold shouldn't be too low to make sure that
9568 			 * a loop with large bound will be rejected quickly.
9569 			 * The most abusive loop will be:
9570 			 * r1 += 1
9571 			 * if r1 < 1000000 goto pc-2
9572 			 * 1M insn_procssed limit / 100 == 10k peak states.
9573 			 * This threshold shouldn't be too high either, since states
9574 			 * at the end of the loop are likely to be useful in pruning.
9575 			 */
9576 			if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9577 			    env->insn_processed - env->prev_insn_processed < 100)
9578 				add_new_state = false;
9579 			goto miss;
9580 		}
9581 		if (states_equal(env, &sl->state, cur)) {
9582 			sl->hit_cnt++;
9583 			/* reached equivalent register/stack state,
9584 			 * prune the search.
9585 			 * Registers read by the continuation are read by us.
9586 			 * If we have any write marks in env->cur_state, they
9587 			 * will prevent corresponding reads in the continuation
9588 			 * from reaching our parent (an explored_state).  Our
9589 			 * own state will get the read marks recorded, but
9590 			 * they'll be immediately forgotten as we're pruning
9591 			 * this state and will pop a new one.
9592 			 */
9593 			err = propagate_liveness(env, &sl->state, cur);
9594 
9595 			/* if previous state reached the exit with precision and
9596 			 * current state is equivalent to it (except precsion marks)
9597 			 * the precision needs to be propagated back in
9598 			 * the current state.
9599 			 */
9600 			err = err ? : push_jmp_history(env, cur);
9601 			err = err ? : propagate_precision(env, &sl->state);
9602 			if (err)
9603 				return err;
9604 			return 1;
9605 		}
9606 miss:
9607 		/* when new state is not going to be added do not increase miss count.
9608 		 * Otherwise several loop iterations will remove the state
9609 		 * recorded earlier. The goal of these heuristics is to have
9610 		 * states from some iterations of the loop (some in the beginning
9611 		 * and some at the end) to help pruning.
9612 		 */
9613 		if (add_new_state)
9614 			sl->miss_cnt++;
9615 		/* heuristic to determine whether this state is beneficial
9616 		 * to keep checking from state equivalence point of view.
9617 		 * Higher numbers increase max_states_per_insn and verification time,
9618 		 * but do not meaningfully decrease insn_processed.
9619 		 */
9620 		if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9621 			/* the state is unlikely to be useful. Remove it to
9622 			 * speed up verification
9623 			 */
9624 			*pprev = sl->next;
9625 			if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9626 				u32 br = sl->state.branches;
9627 
9628 				WARN_ONCE(br,
9629 					  "BUG live_done but branches_to_explore %d\n",
9630 					  br);
9631 				free_verifier_state(&sl->state, false);
9632 				kfree(sl);
9633 				env->peak_states--;
9634 			} else {
9635 				/* cannot free this state, since parentage chain may
9636 				 * walk it later. Add it for free_list instead to
9637 				 * be freed at the end of verification
9638 				 */
9639 				sl->next = env->free_list;
9640 				env->free_list = sl;
9641 			}
9642 			sl = *pprev;
9643 			continue;
9644 		}
9645 next:
9646 		pprev = &sl->next;
9647 		sl = *pprev;
9648 	}
9649 
9650 	if (env->max_states_per_insn < states_cnt)
9651 		env->max_states_per_insn = states_cnt;
9652 
9653 	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
9654 		return push_jmp_history(env, cur);
9655 
9656 	if (!add_new_state)
9657 		return push_jmp_history(env, cur);
9658 
9659 	/* There were no equivalent states, remember the current one.
9660 	 * Technically the current state is not proven to be safe yet,
9661 	 * but it will either reach outer most bpf_exit (which means it's safe)
9662 	 * or it will be rejected. When there are no loops the verifier won't be
9663 	 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9664 	 * again on the way to bpf_exit.
9665 	 * When looping the sl->state.branches will be > 0 and this state
9666 	 * will not be considered for equivalence until branches == 0.
9667 	 */
9668 	new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9669 	if (!new_sl)
9670 		return -ENOMEM;
9671 	env->total_states++;
9672 	env->peak_states++;
9673 	env->prev_jmps_processed = env->jmps_processed;
9674 	env->prev_insn_processed = env->insn_processed;
9675 
9676 	/* add new state to the head of linked list */
9677 	new = &new_sl->state;
9678 	err = copy_verifier_state(new, cur);
9679 	if (err) {
9680 		free_verifier_state(new, false);
9681 		kfree(new_sl);
9682 		return err;
9683 	}
9684 	new->insn_idx = insn_idx;
9685 	WARN_ONCE(new->branches != 1,
9686 		  "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9687 
9688 	cur->parent = new;
9689 	cur->first_insn_idx = insn_idx;
9690 	clear_jmp_history(cur);
9691 	new_sl->next = *explored_state(env, insn_idx);
9692 	*explored_state(env, insn_idx) = new_sl;
9693 	/* connect new state to parentage chain. Current frame needs all
9694 	 * registers connected. Only r6 - r9 of the callers are alive (pushed
9695 	 * to the stack implicitly by JITs) so in callers' frames connect just
9696 	 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9697 	 * the state of the call instruction (with WRITTEN set), and r0 comes
9698 	 * from callee with its full parentage chain, anyway.
9699 	 */
9700 	/* clear write marks in current state: the writes we did are not writes
9701 	 * our child did, so they don't screen off its reads from us.
9702 	 * (There are no read marks in current state, because reads always mark
9703 	 * their parent and current state never has children yet.  Only
9704 	 * explored_states can get read marks.)
9705 	 */
9706 	for (j = 0; j <= cur->curframe; j++) {
9707 		for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++)
9708 			cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9709 		for (i = 0; i < BPF_REG_FP; i++)
9710 			cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9711 	}
9712 
9713 	/* all stack frames are accessible from callee, clear them all */
9714 	for (j = 0; j <= cur->curframe; j++) {
9715 		struct bpf_func_state *frame = cur->frame[j];
9716 		struct bpf_func_state *newframe = new->frame[j];
9717 
9718 		for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9719 			frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9720 			frame->stack[i].spilled_ptr.parent =
9721 						&newframe->stack[i].spilled_ptr;
9722 		}
9723 	}
9724 	return 0;
9725 }
9726 
9727 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)9728 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9729 {
9730 	switch (type) {
9731 	case PTR_TO_CTX:
9732 	case PTR_TO_SOCKET:
9733 	case PTR_TO_SOCKET_OR_NULL:
9734 	case PTR_TO_SOCK_COMMON:
9735 	case PTR_TO_SOCK_COMMON_OR_NULL:
9736 	case PTR_TO_TCP_SOCK:
9737 	case PTR_TO_TCP_SOCK_OR_NULL:
9738 	case PTR_TO_XDP_SOCK:
9739 	case PTR_TO_BTF_ID:
9740 	case PTR_TO_BTF_ID_OR_NULL:
9741 		return false;
9742 	default:
9743 		return true;
9744 	}
9745 }
9746 
9747 /* If an instruction was previously used with particular pointer types, then we
9748  * need to be careful to avoid cases such as the below, where it may be ok
9749  * for one branch accessing the pointer, but not ok for the other branch:
9750  *
9751  * R1 = sock_ptr
9752  * goto X;
9753  * ...
9754  * R1 = some_other_valid_ptr;
9755  * goto X;
9756  * ...
9757  * R2 = *(u32 *)(R1 + 0);
9758  */
reg_type_mismatch(enum bpf_reg_type src,enum bpf_reg_type prev)9759 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9760 {
9761 	return src != prev && (!reg_type_mismatch_ok(src) ||
9762 			       !reg_type_mismatch_ok(prev));
9763 }
9764 
do_check(struct bpf_verifier_env * env)9765 static int do_check(struct bpf_verifier_env *env)
9766 {
9767 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9768 	struct bpf_verifier_state *state = env->cur_state;
9769 	struct bpf_insn *insns = env->prog->insnsi;
9770 	struct bpf_reg_state *regs;
9771 	int insn_cnt = env->prog->len;
9772 	bool do_print_state = false;
9773 	int prev_insn_idx = -1;
9774 
9775 	for (;;) {
9776 		struct bpf_insn *insn;
9777 		u8 class;
9778 		int err;
9779 
9780 		env->prev_insn_idx = prev_insn_idx;
9781 		if (env->insn_idx >= insn_cnt) {
9782 			verbose(env, "invalid insn idx %d insn_cnt %d\n",
9783 				env->insn_idx, insn_cnt);
9784 			return -EFAULT;
9785 		}
9786 
9787 		insn = &insns[env->insn_idx];
9788 		class = BPF_CLASS(insn->code);
9789 
9790 		if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9791 			verbose(env,
9792 				"BPF program is too large. Processed %d insn\n",
9793 				env->insn_processed);
9794 			return -E2BIG;
9795 		}
9796 
9797 		err = is_state_visited(env, env->insn_idx);
9798 		if (err < 0)
9799 			return err;
9800 		if (err == 1) {
9801 			/* found equivalent state, can prune the search */
9802 			if (env->log.level & BPF_LOG_LEVEL) {
9803 				if (do_print_state)
9804 					verbose(env, "\nfrom %d to %d%s: safe\n",
9805 						env->prev_insn_idx, env->insn_idx,
9806 						env->cur_state->speculative ?
9807 						" (speculative execution)" : "");
9808 				else
9809 					verbose(env, "%d: safe\n", env->insn_idx);
9810 			}
9811 			goto process_bpf_exit;
9812 		}
9813 
9814 		if (signal_pending(current))
9815 			return -EAGAIN;
9816 
9817 		if (need_resched())
9818 			cond_resched();
9819 
9820 		if (env->log.level & BPF_LOG_LEVEL2 ||
9821 		    (env->log.level & BPF_LOG_LEVEL && do_print_state)) {
9822 			if (env->log.level & BPF_LOG_LEVEL2)
9823 				verbose(env, "%d:", env->insn_idx);
9824 			else
9825 				verbose(env, "\nfrom %d to %d%s:",
9826 					env->prev_insn_idx, env->insn_idx,
9827 					env->cur_state->speculative ?
9828 					" (speculative execution)" : "");
9829 			print_verifier_state(env, state->frame[state->curframe]);
9830 			do_print_state = false;
9831 		}
9832 
9833 		if (env->log.level & BPF_LOG_LEVEL) {
9834 			const struct bpf_insn_cbs cbs = {
9835 				.cb_print	= verbose,
9836 				.private_data	= env,
9837 			};
9838 
9839 			verbose_linfo(env, env->insn_idx, "; ");
9840 			verbose(env, "%d: ", env->insn_idx);
9841 			print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9842 		}
9843 
9844 		if (bpf_prog_is_dev_bound(env->prog->aux)) {
9845 			err = bpf_prog_offload_verify_insn(env, env->insn_idx,
9846 							   env->prev_insn_idx);
9847 			if (err)
9848 				return err;
9849 		}
9850 
9851 		regs = cur_regs(env);
9852 		sanitize_mark_insn_seen(env);
9853 		prev_insn_idx = env->insn_idx;
9854 
9855 		if (class == BPF_ALU || class == BPF_ALU64) {
9856 			err = check_alu_op(env, insn);
9857 			if (err)
9858 				return err;
9859 
9860 		} else if (class == BPF_LDX) {
9861 			enum bpf_reg_type *prev_src_type, src_reg_type;
9862 
9863 			/* check for reserved fields is already done */
9864 
9865 			/* check src operand */
9866 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
9867 			if (err)
9868 				return err;
9869 
9870 			err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9871 			if (err)
9872 				return err;
9873 
9874 			src_reg_type = regs[insn->src_reg].type;
9875 
9876 			/* check that memory (src_reg + off) is readable,
9877 			 * the state of dst_reg will be updated by this func
9878 			 */
9879 			err = check_mem_access(env, env->insn_idx, insn->src_reg,
9880 					       insn->off, BPF_SIZE(insn->code),
9881 					       BPF_READ, insn->dst_reg, false);
9882 			if (err)
9883 				return err;
9884 
9885 			prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9886 
9887 			if (*prev_src_type == NOT_INIT) {
9888 				/* saw a valid insn
9889 				 * dst_reg = *(u32 *)(src_reg + off)
9890 				 * save type to validate intersecting paths
9891 				 */
9892 				*prev_src_type = src_reg_type;
9893 
9894 			} else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9895 				/* ABuser program is trying to use the same insn
9896 				 * dst_reg = *(u32*) (src_reg + off)
9897 				 * with different pointer types:
9898 				 * src_reg == ctx in one branch and
9899 				 * src_reg == stack|map in some other branch.
9900 				 * Reject it.
9901 				 */
9902 				verbose(env, "same insn cannot be used with different pointers\n");
9903 				return -EINVAL;
9904 			}
9905 
9906 		} else if (class == BPF_STX) {
9907 			enum bpf_reg_type *prev_dst_type, dst_reg_type;
9908 
9909 			if (BPF_MODE(insn->code) == BPF_XADD) {
9910 				err = check_xadd(env, env->insn_idx, insn);
9911 				if (err)
9912 					return err;
9913 				env->insn_idx++;
9914 				continue;
9915 			}
9916 
9917 			/* check src1 operand */
9918 			err = check_reg_arg(env, insn->src_reg, SRC_OP);
9919 			if (err)
9920 				return err;
9921 			/* check src2 operand */
9922 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9923 			if (err)
9924 				return err;
9925 
9926 			dst_reg_type = regs[insn->dst_reg].type;
9927 
9928 			/* check that memory (dst_reg + off) is writeable */
9929 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
9930 					       insn->off, BPF_SIZE(insn->code),
9931 					       BPF_WRITE, insn->src_reg, false);
9932 			if (err)
9933 				return err;
9934 
9935 			prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9936 
9937 			if (*prev_dst_type == NOT_INIT) {
9938 				*prev_dst_type = dst_reg_type;
9939 			} else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9940 				verbose(env, "same insn cannot be used with different pointers\n");
9941 				return -EINVAL;
9942 			}
9943 
9944 		} else if (class == BPF_ST) {
9945 			if (BPF_MODE(insn->code) != BPF_MEM ||
9946 			    insn->src_reg != BPF_REG_0) {
9947 				verbose(env, "BPF_ST uses reserved fields\n");
9948 				return -EINVAL;
9949 			}
9950 			/* check src operand */
9951 			err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9952 			if (err)
9953 				return err;
9954 
9955 			if (is_ctx_reg(env, insn->dst_reg)) {
9956 				verbose(env, "BPF_ST stores into R%d %s is not allowed\n",
9957 					insn->dst_reg,
9958 					reg_type_str[reg_state(env, insn->dst_reg)->type]);
9959 				return -EACCES;
9960 			}
9961 
9962 			/* check that memory (dst_reg + off) is writeable */
9963 			err = check_mem_access(env, env->insn_idx, insn->dst_reg,
9964 					       insn->off, BPF_SIZE(insn->code),
9965 					       BPF_WRITE, -1, false);
9966 			if (err)
9967 				return err;
9968 
9969 		} else if (class == BPF_JMP || class == BPF_JMP32) {
9970 			u8 opcode = BPF_OP(insn->code);
9971 
9972 			env->jmps_processed++;
9973 			if (opcode == BPF_CALL) {
9974 				if (BPF_SRC(insn->code) != BPF_K ||
9975 				    insn->off != 0 ||
9976 				    (insn->src_reg != BPF_REG_0 &&
9977 				     insn->src_reg != BPF_PSEUDO_CALL) ||
9978 				    insn->dst_reg != BPF_REG_0 ||
9979 				    class == BPF_JMP32) {
9980 					verbose(env, "BPF_CALL uses reserved fields\n");
9981 					return -EINVAL;
9982 				}
9983 
9984 				if (env->cur_state->active_spin_lock &&
9985 				    (insn->src_reg == BPF_PSEUDO_CALL ||
9986 				     insn->imm != BPF_FUNC_spin_unlock)) {
9987 					verbose(env, "function calls are not allowed while holding a lock\n");
9988 					return -EINVAL;
9989 				}
9990 				if (insn->src_reg == BPF_PSEUDO_CALL)
9991 					err = check_func_call(env, insn, &env->insn_idx);
9992 				else
9993 					err = check_helper_call(env, insn->imm, env->insn_idx);
9994 				if (err)
9995 					return err;
9996 
9997 			} else if (opcode == BPF_JA) {
9998 				if (BPF_SRC(insn->code) != BPF_K ||
9999 				    insn->imm != 0 ||
10000 				    insn->src_reg != BPF_REG_0 ||
10001 				    insn->dst_reg != BPF_REG_0 ||
10002 				    class == BPF_JMP32) {
10003 					verbose(env, "BPF_JA uses reserved fields\n");
10004 					return -EINVAL;
10005 				}
10006 
10007 				env->insn_idx += insn->off + 1;
10008 				continue;
10009 
10010 			} else if (opcode == BPF_EXIT) {
10011 				if (BPF_SRC(insn->code) != BPF_K ||
10012 				    insn->imm != 0 ||
10013 				    insn->src_reg != BPF_REG_0 ||
10014 				    insn->dst_reg != BPF_REG_0 ||
10015 				    class == BPF_JMP32) {
10016 					verbose(env, "BPF_EXIT uses reserved fields\n");
10017 					return -EINVAL;
10018 				}
10019 
10020 				if (env->cur_state->active_spin_lock) {
10021 					verbose(env, "bpf_spin_unlock is missing\n");
10022 					return -EINVAL;
10023 				}
10024 
10025 				if (state->curframe) {
10026 					/* exit from nested function */
10027 					err = prepare_func_exit(env, &env->insn_idx);
10028 					if (err)
10029 						return err;
10030 					do_print_state = true;
10031 					continue;
10032 				}
10033 
10034 				err = check_reference_leak(env);
10035 				if (err)
10036 					return err;
10037 
10038 				err = check_return_code(env);
10039 				if (err)
10040 					return err;
10041 process_bpf_exit:
10042 				update_branch_counts(env, env->cur_state);
10043 				err = pop_stack(env, &prev_insn_idx,
10044 						&env->insn_idx, pop_log);
10045 				if (err < 0) {
10046 					if (err != -ENOENT)
10047 						return err;
10048 					break;
10049 				} else {
10050 					do_print_state = true;
10051 					continue;
10052 				}
10053 			} else {
10054 				err = check_cond_jmp_op(env, insn, &env->insn_idx);
10055 				if (err)
10056 					return err;
10057 			}
10058 		} else if (class == BPF_LD) {
10059 			u8 mode = BPF_MODE(insn->code);
10060 
10061 			if (mode == BPF_ABS || mode == BPF_IND) {
10062 				err = check_ld_abs(env, insn);
10063 				if (err)
10064 					return err;
10065 
10066 			} else if (mode == BPF_IMM) {
10067 				err = check_ld_imm(env, insn);
10068 				if (err)
10069 					return err;
10070 
10071 				env->insn_idx++;
10072 				sanitize_mark_insn_seen(env);
10073 			} else {
10074 				verbose(env, "invalid BPF_LD mode\n");
10075 				return -EINVAL;
10076 			}
10077 		} else {
10078 			verbose(env, "unknown insn class %d\n", class);
10079 			return -EINVAL;
10080 		}
10081 
10082 		env->insn_idx++;
10083 	}
10084 
10085 	return 0;
10086 }
10087 
10088 /* replace pseudo btf_id with kernel symbol address */
check_pseudo_btf_id(struct bpf_verifier_env * env,struct bpf_insn * insn,struct bpf_insn_aux_data * aux)10089 static int check_pseudo_btf_id(struct bpf_verifier_env *env,
10090 			       struct bpf_insn *insn,
10091 			       struct bpf_insn_aux_data *aux)
10092 {
10093 	const struct btf_var_secinfo *vsi;
10094 	const struct btf_type *datasec;
10095 	const struct btf_type *t;
10096 	const char *sym_name;
10097 	bool percpu = false;
10098 	u32 type, id = insn->imm;
10099 	s32 datasec_id;
10100 	u64 addr;
10101 	int i;
10102 
10103 	if (!btf_vmlinux) {
10104 		verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
10105 		return -EINVAL;
10106 	}
10107 
10108 	if (insn[1].imm != 0) {
10109 		verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
10110 		return -EINVAL;
10111 	}
10112 
10113 	t = btf_type_by_id(btf_vmlinux, id);
10114 	if (!t) {
10115 		verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
10116 		return -ENOENT;
10117 	}
10118 
10119 	if (!btf_type_is_var(t)) {
10120 		verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n",
10121 			id);
10122 		return -EINVAL;
10123 	}
10124 
10125 	sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
10126 	addr = kallsyms_lookup_name(sym_name);
10127 	if (!addr) {
10128 		verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n",
10129 			sym_name);
10130 		return -ENOENT;
10131 	}
10132 
10133 	datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu",
10134 					   BTF_KIND_DATASEC);
10135 	if (datasec_id > 0) {
10136 		datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10137 		for_each_vsi(i, datasec, vsi) {
10138 			if (vsi->type == id) {
10139 				percpu = true;
10140 				break;
10141 			}
10142 		}
10143 	}
10144 
10145 	insn[0].imm = (u32)addr;
10146 	insn[1].imm = addr >> 32;
10147 
10148 	type = t->type;
10149 	t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10150 	if (percpu) {
10151 		aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10152 		aux->btf_var.btf_id = type;
10153 	} else if (!btf_type_is_struct(t)) {
10154 		const struct btf_type *ret;
10155 		const char *tname;
10156 		u32 tsize;
10157 
10158 		/* resolve the type size of ksym. */
10159 		ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10160 		if (IS_ERR(ret)) {
10161 			tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10162 			verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n",
10163 				tname, PTR_ERR(ret));
10164 			return -EINVAL;
10165 		}
10166 		aux->btf_var.reg_type = PTR_TO_MEM;
10167 		aux->btf_var.mem_size = tsize;
10168 	} else {
10169 		aux->btf_var.reg_type = PTR_TO_BTF_ID;
10170 		aux->btf_var.btf_id = type;
10171 	}
10172 	return 0;
10173 }
10174 
check_map_prealloc(struct bpf_map * map)10175 static int check_map_prealloc(struct bpf_map *map)
10176 {
10177 	return (map->map_type != BPF_MAP_TYPE_HASH &&
10178 		map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10179 		map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
10180 		!(map->map_flags & BPF_F_NO_PREALLOC);
10181 }
10182 
is_tracing_prog_type(enum bpf_prog_type type)10183 static bool is_tracing_prog_type(enum bpf_prog_type type)
10184 {
10185 	switch (type) {
10186 	case BPF_PROG_TYPE_KPROBE:
10187 	case BPF_PROG_TYPE_TRACEPOINT:
10188 	case BPF_PROG_TYPE_PERF_EVENT:
10189 	case BPF_PROG_TYPE_RAW_TRACEPOINT:
10190 		return true;
10191 	default:
10192 		return false;
10193 	}
10194 }
10195 
is_preallocated_map(struct bpf_map * map)10196 static bool is_preallocated_map(struct bpf_map *map)
10197 {
10198 	if (!check_map_prealloc(map))
10199 		return false;
10200 	if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta))
10201 		return false;
10202 	return true;
10203 }
10204 
check_map_prog_compatibility(struct bpf_verifier_env * env,struct bpf_map * map,struct bpf_prog * prog)10205 static int check_map_prog_compatibility(struct bpf_verifier_env *env,
10206 					struct bpf_map *map,
10207 					struct bpf_prog *prog)
10208 
10209 {
10210 	enum bpf_prog_type prog_type = resolve_prog_type(prog);
10211 	/*
10212 	 * Validate that trace type programs use preallocated hash maps.
10213 	 *
10214 	 * For programs attached to PERF events this is mandatory as the
10215 	 * perf NMI can hit any arbitrary code sequence.
10216 	 *
10217 	 * All other trace types using preallocated hash maps are unsafe as
10218 	 * well because tracepoint or kprobes can be inside locked regions
10219 	 * of the memory allocator or at a place where a recursion into the
10220 	 * memory allocator would see inconsistent state.
10221 	 *
10222 	 * On RT enabled kernels run-time allocation of all trace type
10223 	 * programs is strictly prohibited due to lock type constraints. On
10224 	 * !RT kernels it is allowed for backwards compatibility reasons for
10225 	 * now, but warnings are emitted so developers are made aware of
10226 	 * the unsafety and can fix their programs before this is enforced.
10227 	 */
10228 	if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10229 		if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
10230 			verbose(env, "perf_event programs can only use preallocated hash map\n");
10231 			return -EINVAL;
10232 		}
10233 		if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10234 			verbose(env, "trace type programs can only use preallocated hash map\n");
10235 			return -EINVAL;
10236 		}
10237 		WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10238 		verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10239 	}
10240 
10241 	if ((is_tracing_prog_type(prog_type) ||
10242 	     prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
10243 	    map_value_has_spin_lock(map)) {
10244 		verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10245 		return -EINVAL;
10246 	}
10247 
10248 	if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) &&
10249 	    !bpf_offload_prog_map_match(prog, map)) {
10250 		verbose(env, "offload device mismatch between prog and map\n");
10251 		return -EINVAL;
10252 	}
10253 
10254 	if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10255 		verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10256 		return -EINVAL;
10257 	}
10258 
10259 	if (prog->aux->sleepable)
10260 		switch (map->map_type) {
10261 		case BPF_MAP_TYPE_HASH:
10262 		case BPF_MAP_TYPE_LRU_HASH:
10263 		case BPF_MAP_TYPE_ARRAY:
10264 			if (!is_preallocated_map(map)) {
10265 				verbose(env,
10266 					"Sleepable programs can only use preallocated hash maps\n");
10267 				return -EINVAL;
10268 			}
10269 			break;
10270 		default:
10271 			verbose(env,
10272 				"Sleepable programs can only use array and hash maps\n");
10273 			return -EINVAL;
10274 		}
10275 
10276 	return 0;
10277 }
10278 
bpf_map_is_cgroup_storage(struct bpf_map * map)10279 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10280 {
10281 	return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE ||
10282 		map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10283 }
10284 
10285 /* find and rewrite pseudo imm in ld_imm64 instructions:
10286  *
10287  * 1. if it accesses map FD, replace it with actual map pointer.
10288  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10289  *
10290  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
10291  */
resolve_pseudo_ldimm64(struct bpf_verifier_env * env)10292 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
10293 {
10294 	struct bpf_insn *insn = env->prog->insnsi;
10295 	int insn_cnt = env->prog->len;
10296 	int i, j, err;
10297 
10298 	err = bpf_prog_calc_tag(env->prog);
10299 	if (err)
10300 		return err;
10301 
10302 	for (i = 0; i < insn_cnt; i++, insn++) {
10303 		if (BPF_CLASS(insn->code) == BPF_LDX &&
10304 		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
10305 			verbose(env, "BPF_LDX uses reserved fields\n");
10306 			return -EINVAL;
10307 		}
10308 
10309 		if (BPF_CLASS(insn->code) == BPF_STX &&
10310 		    ((BPF_MODE(insn->code) != BPF_MEM &&
10311 		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
10312 			verbose(env, "BPF_STX uses reserved fields\n");
10313 			return -EINVAL;
10314 		}
10315 
10316 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10317 			struct bpf_insn_aux_data *aux;
10318 			struct bpf_map *map;
10319 			struct fd f;
10320 			u64 addr;
10321 
10322 			if (i == insn_cnt - 1 || insn[1].code != 0 ||
10323 			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
10324 			    insn[1].off != 0) {
10325 				verbose(env, "invalid bpf_ld_imm64 insn\n");
10326 				return -EINVAL;
10327 			}
10328 
10329 			if (insn[0].src_reg == 0)
10330 				/* valid generic load 64-bit imm */
10331 				goto next_insn;
10332 
10333 			if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10334 				aux = &env->insn_aux_data[i];
10335 				err = check_pseudo_btf_id(env, insn, aux);
10336 				if (err)
10337 					return err;
10338 				goto next_insn;
10339 			}
10340 
10341 			/* In final convert_pseudo_ld_imm64() step, this is
10342 			 * converted into regular 64-bit imm load insn.
10343 			 */
10344 			if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD &&
10345 			     insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10346 			    (insn[0].src_reg == BPF_PSEUDO_MAP_FD &&
10347 			     insn[1].imm != 0)) {
10348 				verbose(env,
10349 					"unrecognized bpf_ld_imm64 insn\n");
10350 				return -EINVAL;
10351 			}
10352 
10353 			f = fdget(insn[0].imm);
10354 			map = __bpf_map_get(f);
10355 			if (IS_ERR(map)) {
10356 				verbose(env, "fd %d is not pointing to valid bpf_map\n",
10357 					insn[0].imm);
10358 				return PTR_ERR(map);
10359 			}
10360 
10361 			err = check_map_prog_compatibility(env, map, env->prog);
10362 			if (err) {
10363 				fdput(f);
10364 				return err;
10365 			}
10366 
10367 			aux = &env->insn_aux_data[i];
10368 			if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10369 				addr = (unsigned long)map;
10370 			} else {
10371 				u32 off = insn[1].imm;
10372 
10373 				if (off >= BPF_MAX_VAR_OFF) {
10374 					verbose(env, "direct value offset of %u is not allowed\n", off);
10375 					fdput(f);
10376 					return -EINVAL;
10377 				}
10378 
10379 				if (!map->ops->map_direct_value_addr) {
10380 					verbose(env, "no direct value access support for this map type\n");
10381 					fdput(f);
10382 					return -EINVAL;
10383 				}
10384 
10385 				err = map->ops->map_direct_value_addr(map, &addr, off);
10386 				if (err) {
10387 					verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n",
10388 						map->value_size, off);
10389 					fdput(f);
10390 					return err;
10391 				}
10392 
10393 				aux->map_off = off;
10394 				addr += off;
10395 			}
10396 
10397 			insn[0].imm = (u32)addr;
10398 			insn[1].imm = addr >> 32;
10399 
10400 			/* check whether we recorded this map already */
10401 			for (j = 0; j < env->used_map_cnt; j++) {
10402 				if (env->used_maps[j] == map) {
10403 					aux->map_index = j;
10404 					fdput(f);
10405 					goto next_insn;
10406 				}
10407 			}
10408 
10409 			if (env->used_map_cnt >= MAX_USED_MAPS) {
10410 				fdput(f);
10411 				return -E2BIG;
10412 			}
10413 
10414 			/* hold the map. If the program is rejected by verifier,
10415 			 * the map will be released by release_maps() or it
10416 			 * will be used by the valid program until it's unloaded
10417 			 * and all maps are released in free_used_maps()
10418 			 */
10419 			bpf_map_inc(map);
10420 
10421 			aux->map_index = env->used_map_cnt;
10422 			env->used_maps[env->used_map_cnt++] = map;
10423 
10424 			if (bpf_map_is_cgroup_storage(map) &&
10425 			    bpf_cgroup_storage_assign(env->prog->aux, map)) {
10426 				verbose(env, "only one cgroup storage of each type is allowed\n");
10427 				fdput(f);
10428 				return -EBUSY;
10429 			}
10430 
10431 			fdput(f);
10432 next_insn:
10433 			insn++;
10434 			i++;
10435 			continue;
10436 		}
10437 
10438 		/* Basic sanity check before we invest more work here. */
10439 		if (!bpf_opcode_in_insntable(insn->code)) {
10440 			verbose(env, "unknown opcode %02x\n", insn->code);
10441 			return -EINVAL;
10442 		}
10443 	}
10444 
10445 	/* now all pseudo BPF_LD_IMM64 instructions load valid
10446 	 * 'struct bpf_map *' into a register instead of user map_fd.
10447 	 * These pointers will be used later by verifier to validate map access.
10448 	 */
10449 	return 0;
10450 }
10451 
10452 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env * env)10453 static void release_maps(struct bpf_verifier_env *env)
10454 {
10455 	__bpf_free_used_maps(env->prog->aux, env->used_maps,
10456 			     env->used_map_cnt);
10457 }
10458 
10459 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env * env)10460 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
10461 {
10462 	struct bpf_insn *insn = env->prog->insnsi;
10463 	int insn_cnt = env->prog->len;
10464 	int i;
10465 
10466 	for (i = 0; i < insn_cnt; i++, insn++)
10467 		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
10468 			insn->src_reg = 0;
10469 }
10470 
10471 /* single env->prog->insni[off] instruction was replaced with the range
10472  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
10473  * [0, off) and [off, end) to new locations, so the patched range stays zero
10474  */
adjust_insn_aux_data(struct bpf_verifier_env * env,struct bpf_insn_aux_data * new_data,struct bpf_prog * new_prog,u32 off,u32 cnt)10475 static void adjust_insn_aux_data(struct bpf_verifier_env *env,
10476 				 struct bpf_insn_aux_data *new_data,
10477 				 struct bpf_prog *new_prog, u32 off, u32 cnt)
10478 {
10479 	struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10480 	struct bpf_insn *insn = new_prog->insnsi;
10481 	u32 old_seen = old_data[off].seen;
10482 	u32 prog_len;
10483 	int i;
10484 
10485 	/* aux info at OFF always needs adjustment, no matter fast path
10486 	 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10487 	 * original insn at old prog.
10488 	 */
10489 	old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10490 
10491 	if (cnt == 1)
10492 		return;
10493 	prog_len = new_prog->len;
10494 
10495 	memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
10496 	memcpy(new_data + off + cnt - 1, old_data + off,
10497 	       sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
10498 	for (i = off; i < off + cnt - 1; i++) {
10499 		/* Expand insni[off]'s seen count to the patched range. */
10500 		new_data[i].seen = old_seen;
10501 		new_data[i].zext_dst = insn_has_def32(env, insn + i);
10502 	}
10503 	env->insn_aux_data = new_data;
10504 	vfree(old_data);
10505 }
10506 
adjust_subprog_starts(struct bpf_verifier_env * env,u32 off,u32 len)10507 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
10508 {
10509 	int i;
10510 
10511 	if (len == 1)
10512 		return;
10513 	/* NOTE: fake 'exit' subprog should be updated as well. */
10514 	for (i = 0; i <= env->subprog_cnt; i++) {
10515 		if (env->subprog_info[i].start <= off)
10516 			continue;
10517 		env->subprog_info[i].start += len - 1;
10518 	}
10519 }
10520 
adjust_poke_descs(struct bpf_prog * prog,u32 off,u32 len)10521 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10522 {
10523 	struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10524 	int i, sz = prog->aux->size_poke_tab;
10525 	struct bpf_jit_poke_descriptor *desc;
10526 
10527 	for (i = 0; i < sz; i++) {
10528 		desc = &tab[i];
10529 		if (desc->insn_idx <= off)
10530 			continue;
10531 		desc->insn_idx += len - 1;
10532 	}
10533 }
10534 
bpf_patch_insn_data(struct bpf_verifier_env * env,u32 off,const struct bpf_insn * patch,u32 len)10535 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
10536 					    const struct bpf_insn *patch, u32 len)
10537 {
10538 	struct bpf_prog *new_prog;
10539 	struct bpf_insn_aux_data *new_data = NULL;
10540 
10541 	if (len > 1) {
10542 		new_data = vzalloc(array_size(env->prog->len + len - 1,
10543 					      sizeof(struct bpf_insn_aux_data)));
10544 		if (!new_data)
10545 			return NULL;
10546 	}
10547 
10548 	new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10549 	if (IS_ERR(new_prog)) {
10550 		if (PTR_ERR(new_prog) == -ERANGE)
10551 			verbose(env,
10552 				"insn %d cannot be patched due to 16-bit range\n",
10553 				env->insn_aux_data[off].orig_idx);
10554 		vfree(new_data);
10555 		return NULL;
10556 	}
10557 	adjust_insn_aux_data(env, new_data, new_prog, off, len);
10558 	adjust_subprog_starts(env, off, len);
10559 	adjust_poke_descs(new_prog, off, len);
10560 	return new_prog;
10561 }
10562 
adjust_subprog_starts_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)10563 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
10564 					      u32 off, u32 cnt)
10565 {
10566 	int i, j;
10567 
10568 	/* find first prog starting at or after off (first to remove) */
10569 	for (i = 0; i < env->subprog_cnt; i++)
10570 		if (env->subprog_info[i].start >= off)
10571 			break;
10572 	/* find first prog starting at or after off + cnt (first to stay) */
10573 	for (j = i; j < env->subprog_cnt; j++)
10574 		if (env->subprog_info[j].start >= off + cnt)
10575 			break;
10576 	/* if j doesn't start exactly at off + cnt, we are just removing
10577 	 * the front of previous prog
10578 	 */
10579 	if (env->subprog_info[j].start != off + cnt)
10580 		j--;
10581 
10582 	if (j > i) {
10583 		struct bpf_prog_aux *aux = env->prog->aux;
10584 		int move;
10585 
10586 		/* move fake 'exit' subprog as well */
10587 		move = env->subprog_cnt + 1 - j;
10588 
10589 		memmove(env->subprog_info + i,
10590 			env->subprog_info + j,
10591 			sizeof(*env->subprog_info) * move);
10592 		env->subprog_cnt -= j - i;
10593 
10594 		/* remove func_info */
10595 		if (aux->func_info) {
10596 			move = aux->func_info_cnt - j;
10597 
10598 			memmove(aux->func_info + i,
10599 				aux->func_info + j,
10600 				sizeof(*aux->func_info) * move);
10601 			aux->func_info_cnt -= j - i;
10602 			/* func_info->insn_off is set after all code rewrites,
10603 			 * in adjust_btf_func() - no need to adjust
10604 			 */
10605 		}
10606 	} else {
10607 		/* convert i from "first prog to remove" to "first to adjust" */
10608 		if (env->subprog_info[i].start == off)
10609 			i++;
10610 	}
10611 
10612 	/* update fake 'exit' subprog as well */
10613 	for (; i <= env->subprog_cnt; i++)
10614 		env->subprog_info[i].start -= cnt;
10615 
10616 	return 0;
10617 }
10618 
bpf_adj_linfo_after_remove(struct bpf_verifier_env * env,u32 off,u32 cnt)10619 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off,
10620 				      u32 cnt)
10621 {
10622 	struct bpf_prog *prog = env->prog;
10623 	u32 i, l_off, l_cnt, nr_linfo;
10624 	struct bpf_line_info *linfo;
10625 
10626 	nr_linfo = prog->aux->nr_linfo;
10627 	if (!nr_linfo)
10628 		return 0;
10629 
10630 	linfo = prog->aux->linfo;
10631 
10632 	/* find first line info to remove, count lines to be removed */
10633 	for (i = 0; i < nr_linfo; i++)
10634 		if (linfo[i].insn_off >= off)
10635 			break;
10636 
10637 	l_off = i;
10638 	l_cnt = 0;
10639 	for (; i < nr_linfo; i++)
10640 		if (linfo[i].insn_off < off + cnt)
10641 			l_cnt++;
10642 		else
10643 			break;
10644 
10645 	/* First live insn doesn't match first live linfo, it needs to "inherit"
10646 	 * last removed linfo.  prog is already modified, so prog->len == off
10647 	 * means no live instructions after (tail of the program was removed).
10648 	 */
10649 	if (prog->len != off && l_cnt &&
10650 	    (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10651 		l_cnt--;
10652 		linfo[--i].insn_off = off + cnt;
10653 	}
10654 
10655 	/* remove the line info which refer to the removed instructions */
10656 	if (l_cnt) {
10657 		memmove(linfo + l_off, linfo + i,
10658 			sizeof(*linfo) * (nr_linfo - i));
10659 
10660 		prog->aux->nr_linfo -= l_cnt;
10661 		nr_linfo = prog->aux->nr_linfo;
10662 	}
10663 
10664 	/* pull all linfo[i].insn_off >= off + cnt in by cnt */
10665 	for (i = l_off; i < nr_linfo; i++)
10666 		linfo[i].insn_off -= cnt;
10667 
10668 	/* fix up all subprogs (incl. 'exit') which start >= off */
10669 	for (i = 0; i <= env->subprog_cnt; i++)
10670 		if (env->subprog_info[i].linfo_idx > l_off) {
10671 			/* program may have started in the removed region but
10672 			 * may not be fully removed
10673 			 */
10674 			if (env->subprog_info[i].linfo_idx >= l_off + l_cnt)
10675 				env->subprog_info[i].linfo_idx -= l_cnt;
10676 			else
10677 				env->subprog_info[i].linfo_idx = l_off;
10678 		}
10679 
10680 	return 0;
10681 }
10682 
verifier_remove_insns(struct bpf_verifier_env * env,u32 off,u32 cnt)10683 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10684 {
10685 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10686 	unsigned int orig_prog_len = env->prog->len;
10687 	int err;
10688 
10689 	if (bpf_prog_is_dev_bound(env->prog->aux))
10690 		bpf_prog_offload_remove_insns(env, off, cnt);
10691 
10692 	err = bpf_remove_insns(env->prog, off, cnt);
10693 	if (err)
10694 		return err;
10695 
10696 	err = adjust_subprog_starts_after_remove(env, off, cnt);
10697 	if (err)
10698 		return err;
10699 
10700 	err = bpf_adj_linfo_after_remove(env, off, cnt);
10701 	if (err)
10702 		return err;
10703 
10704 	memmove(aux_data + off,	aux_data + off + cnt,
10705 		sizeof(*aux_data) * (orig_prog_len - off - cnt));
10706 
10707 	return 0;
10708 }
10709 
10710 /* The verifier does more data flow analysis than llvm and will not
10711  * explore branches that are dead at run time. Malicious programs can
10712  * have dead code too. Therefore replace all dead at-run-time code
10713  * with 'ja -1'.
10714  *
10715  * Just nops are not optimal, e.g. if they would sit at the end of the
10716  * program and through another bug we would manage to jump there, then
10717  * we'd execute beyond program memory otherwise. Returning exception
10718  * code also wouldn't work since we can have subprogs where the dead
10719  * code could be located.
10720  */
sanitize_dead_code(struct bpf_verifier_env * env)10721 static void sanitize_dead_code(struct bpf_verifier_env *env)
10722 {
10723 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10724 	struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
10725 	struct bpf_insn *insn = env->prog->insnsi;
10726 	const int insn_cnt = env->prog->len;
10727 	int i;
10728 
10729 	for (i = 0; i < insn_cnt; i++) {
10730 		if (aux_data[i].seen)
10731 			continue;
10732 		memcpy(insn + i, &trap, sizeof(trap));
10733 		aux_data[i].zext_dst = false;
10734 	}
10735 }
10736 
insn_is_cond_jump(u8 code)10737 static bool insn_is_cond_jump(u8 code)
10738 {
10739 	u8 op;
10740 
10741 	if (BPF_CLASS(code) == BPF_JMP32)
10742 		return true;
10743 
10744 	if (BPF_CLASS(code) != BPF_JMP)
10745 		return false;
10746 
10747 	op = BPF_OP(code);
10748 	return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10749 }
10750 
opt_hard_wire_dead_code_branches(struct bpf_verifier_env * env)10751 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10752 {
10753 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10754 	struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10755 	struct bpf_insn *insn = env->prog->insnsi;
10756 	const int insn_cnt = env->prog->len;
10757 	int i;
10758 
10759 	for (i = 0; i < insn_cnt; i++, insn++) {
10760 		if (!insn_is_cond_jump(insn->code))
10761 			continue;
10762 
10763 		if (!aux_data[i + 1].seen)
10764 			ja.off = insn->off;
10765 		else if (!aux_data[i + 1 + insn->off].seen)
10766 			ja.off = 0;
10767 		else
10768 			continue;
10769 
10770 		if (bpf_prog_is_dev_bound(env->prog->aux))
10771 			bpf_prog_offload_replace_insn(env, i, &ja);
10772 
10773 		memcpy(insn, &ja, sizeof(ja));
10774 	}
10775 }
10776 
opt_remove_dead_code(struct bpf_verifier_env * env)10777 static int opt_remove_dead_code(struct bpf_verifier_env *env)
10778 {
10779 	struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10780 	int insn_cnt = env->prog->len;
10781 	int i, err;
10782 
10783 	for (i = 0; i < insn_cnt; i++) {
10784 		int j;
10785 
10786 		j = 0;
10787 		while (i + j < insn_cnt && !aux_data[i + j].seen)
10788 			j++;
10789 		if (!j)
10790 			continue;
10791 
10792 		err = verifier_remove_insns(env, i, j);
10793 		if (err)
10794 			return err;
10795 		insn_cnt = env->prog->len;
10796 	}
10797 
10798 	return 0;
10799 }
10800 
opt_remove_nops(struct bpf_verifier_env * env)10801 static int opt_remove_nops(struct bpf_verifier_env *env)
10802 {
10803 	const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10804 	struct bpf_insn *insn = env->prog->insnsi;
10805 	int insn_cnt = env->prog->len;
10806 	int i, err;
10807 
10808 	for (i = 0; i < insn_cnt; i++) {
10809 		if (memcmp(&insn[i], &ja, sizeof(ja)))
10810 			continue;
10811 
10812 		err = verifier_remove_insns(env, i, 1);
10813 		if (err)
10814 			return err;
10815 		insn_cnt--;
10816 		i--;
10817 	}
10818 
10819 	return 0;
10820 }
10821 
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env * env,const union bpf_attr * attr)10822 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
10823 					 const union bpf_attr *attr)
10824 {
10825 	struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10826 	struct bpf_insn_aux_data *aux = env->insn_aux_data;
10827 	int i, patch_len, delta = 0, len = env->prog->len;
10828 	struct bpf_insn *insns = env->prog->insnsi;
10829 	struct bpf_prog *new_prog;
10830 	bool rnd_hi32;
10831 
10832 	rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10833 	zext_patch[1] = BPF_ZEXT_REG(0);
10834 	rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10835 	rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10836 	rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10837 	for (i = 0; i < len; i++) {
10838 		int adj_idx = i + delta;
10839 		struct bpf_insn insn;
10840 
10841 		insn = insns[adj_idx];
10842 		if (!aux[adj_idx].zext_dst) {
10843 			u8 code, class;
10844 			u32 imm_rnd;
10845 
10846 			if (!rnd_hi32)
10847 				continue;
10848 
10849 			code = insn.code;
10850 			class = BPF_CLASS(code);
10851 			if (insn_no_def(&insn))
10852 				continue;
10853 
10854 			/* NOTE: arg "reg" (the fourth one) is only used for
10855 			 *       BPF_STX which has been ruled out in above
10856 			 *       check, it is safe to pass NULL here.
10857 			 */
10858 			if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10859 				if (class == BPF_LD &&
10860 				    BPF_MODE(code) == BPF_IMM)
10861 					i++;
10862 				continue;
10863 			}
10864 
10865 			/* ctx load could be transformed into wider load. */
10866 			if (class == BPF_LDX &&
10867 			    aux[adj_idx].ptr_type == PTR_TO_CTX)
10868 				continue;
10869 
10870 			imm_rnd = get_random_int();
10871 			rnd_hi32_patch[0] = insn;
10872 			rnd_hi32_patch[1].imm = imm_rnd;
10873 			rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10874 			patch = rnd_hi32_patch;
10875 			patch_len = 4;
10876 			goto apply_patch_buffer;
10877 		}
10878 
10879 		if (!bpf_jit_needs_zext())
10880 			continue;
10881 
10882 		zext_patch[0] = insn;
10883 		zext_patch[1].dst_reg = insn.dst_reg;
10884 		zext_patch[1].src_reg = insn.dst_reg;
10885 		patch = zext_patch;
10886 		patch_len = 2;
10887 apply_patch_buffer:
10888 		new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10889 		if (!new_prog)
10890 			return -ENOMEM;
10891 		env->prog = new_prog;
10892 		insns = new_prog->insnsi;
10893 		aux = env->insn_aux_data;
10894 		delta += patch_len - 1;
10895 	}
10896 
10897 	return 0;
10898 }
10899 
10900 /* convert load instructions that access fields of a context type into a
10901  * sequence of instructions that access fields of the underlying structure:
10902  *     struct __sk_buff    -> struct sk_buff
10903  *     struct bpf_sock_ops -> struct sock
10904  */
convert_ctx_accesses(struct bpf_verifier_env * env)10905 static int convert_ctx_accesses(struct bpf_verifier_env *env)
10906 {
10907 	const struct bpf_verifier_ops *ops = env->ops;
10908 	int i, cnt, size, ctx_field_size, delta = 0;
10909 	const int insn_cnt = env->prog->len;
10910 	struct bpf_insn insn_buf[16], *insn;
10911 	u32 target_size, size_default, off;
10912 	struct bpf_prog *new_prog;
10913 	enum bpf_access_type type;
10914 	bool is_narrower_load;
10915 
10916 	if (ops->gen_prologue || env->seen_direct_write) {
10917 		if (!ops->gen_prologue) {
10918 			verbose(env, "bpf verifier is misconfigured\n");
10919 			return -EINVAL;
10920 		}
10921 		cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
10922 					env->prog);
10923 		if (cnt >= ARRAY_SIZE(insn_buf)) {
10924 			verbose(env, "bpf verifier is misconfigured\n");
10925 			return -EINVAL;
10926 		} else if (cnt) {
10927 			new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10928 			if (!new_prog)
10929 				return -ENOMEM;
10930 
10931 			env->prog = new_prog;
10932 			delta += cnt - 1;
10933 		}
10934 	}
10935 
10936 	if (bpf_prog_is_dev_bound(env->prog->aux))
10937 		return 0;
10938 
10939 	insn = env->prog->insnsi + delta;
10940 
10941 	for (i = 0; i < insn_cnt; i++, insn++) {
10942 		bpf_convert_ctx_access_t convert_ctx_access;
10943 		bool ctx_access;
10944 
10945 		if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
10946 		    insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
10947 		    insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
10948 		    insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
10949 			type = BPF_READ;
10950 			ctx_access = true;
10951 		} else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
10952 			   insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
10953 			   insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
10954 			   insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
10955 			   insn->code == (BPF_ST | BPF_MEM | BPF_B) ||
10956 			   insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
10957 			   insn->code == (BPF_ST | BPF_MEM | BPF_W) ||
10958 			   insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
10959 			type = BPF_WRITE;
10960 			ctx_access = BPF_CLASS(insn->code) == BPF_STX;
10961 		} else {
10962 			continue;
10963 		}
10964 
10965 		if (type == BPF_WRITE &&
10966 		    env->insn_aux_data[i + delta].sanitize_stack_spill) {
10967 			struct bpf_insn patch[] = {
10968 				*insn,
10969 				BPF_ST_NOSPEC(),
10970 			};
10971 
10972 			cnt = ARRAY_SIZE(patch);
10973 			new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
10974 			if (!new_prog)
10975 				return -ENOMEM;
10976 
10977 			delta    += cnt - 1;
10978 			env->prog = new_prog;
10979 			insn      = new_prog->insnsi + i + delta;
10980 			continue;
10981 		}
10982 
10983 		if (!ctx_access)
10984 			continue;
10985 
10986 		switch (env->insn_aux_data[i + delta].ptr_type) {
10987 		case PTR_TO_CTX:
10988 			if (!ops->convert_ctx_access)
10989 				continue;
10990 			convert_ctx_access = ops->convert_ctx_access;
10991 			break;
10992 		case PTR_TO_SOCKET:
10993 		case PTR_TO_SOCK_COMMON:
10994 			convert_ctx_access = bpf_sock_convert_ctx_access;
10995 			break;
10996 		case PTR_TO_TCP_SOCK:
10997 			convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10998 			break;
10999 		case PTR_TO_XDP_SOCK:
11000 			convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
11001 			break;
11002 		case PTR_TO_BTF_ID:
11003 			if (type == BPF_READ) {
11004 				insn->code = BPF_LDX | BPF_PROBE_MEM |
11005 					BPF_SIZE((insn)->code);
11006 				env->prog->aux->num_exentries++;
11007 			} else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
11008 				verbose(env, "Writes through BTF pointers are not allowed\n");
11009 				return -EINVAL;
11010 			}
11011 			continue;
11012 		default:
11013 			continue;
11014 		}
11015 
11016 		ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
11017 		size = BPF_LDST_BYTES(insn);
11018 
11019 		/* If the read access is a narrower load of the field,
11020 		 * convert to a 4/8-byte load, to minimum program type specific
11021 		 * convert_ctx_access changes. If conversion is successful,
11022 		 * we will apply proper mask to the result.
11023 		 */
11024 		is_narrower_load = size < ctx_field_size;
11025 		size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
11026 		off = insn->off;
11027 		if (is_narrower_load) {
11028 			u8 size_code;
11029 
11030 			if (type == BPF_WRITE) {
11031 				verbose(env, "bpf verifier narrow ctx access misconfigured\n");
11032 				return -EINVAL;
11033 			}
11034 
11035 			size_code = BPF_H;
11036 			if (ctx_field_size == 4)
11037 				size_code = BPF_W;
11038 			else if (ctx_field_size == 8)
11039 				size_code = BPF_DW;
11040 
11041 			insn->off = off & ~(size_default - 1);
11042 			insn->code = BPF_LDX | BPF_MEM | size_code;
11043 		}
11044 
11045 		target_size = 0;
11046 		cnt = convert_ctx_access(type, insn, insn_buf, env->prog,
11047 					 &target_size);
11048 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) ||
11049 		    (ctx_field_size && !target_size)) {
11050 			verbose(env, "bpf verifier is misconfigured\n");
11051 			return -EINVAL;
11052 		}
11053 
11054 		if (is_narrower_load && size < target_size) {
11055 			u8 shift = bpf_ctx_narrow_access_offset(
11056 				off, size, size_default) * 8;
11057 			if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
11058 				verbose(env, "bpf verifier narrow ctx load misconfigured\n");
11059 				return -EINVAL;
11060 			}
11061 			if (ctx_field_size <= 4) {
11062 				if (shift)
11063 					insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH,
11064 									insn->dst_reg,
11065 									shift);
11066 				insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg,
11067 								(1 << size * 8) - 1);
11068 			} else {
11069 				if (shift)
11070 					insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH,
11071 									insn->dst_reg,
11072 									shift);
11073 				insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg,
11074 								(1ULL << size * 8) - 1);
11075 			}
11076 		}
11077 
11078 		new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11079 		if (!new_prog)
11080 			return -ENOMEM;
11081 
11082 		delta += cnt - 1;
11083 
11084 		/* keep walking new program and skip insns we just inserted */
11085 		env->prog = new_prog;
11086 		insn      = new_prog->insnsi + i + delta;
11087 	}
11088 
11089 	return 0;
11090 }
11091 
jit_subprogs(struct bpf_verifier_env * env)11092 static int jit_subprogs(struct bpf_verifier_env *env)
11093 {
11094 	struct bpf_prog *prog = env->prog, **func, *tmp;
11095 	int i, j, subprog_start, subprog_end = 0, len, subprog;
11096 	struct bpf_map *map_ptr;
11097 	struct bpf_insn *insn;
11098 	void *old_bpf_func;
11099 	int err, num_exentries;
11100 
11101 	if (env->subprog_cnt <= 1)
11102 		return 0;
11103 
11104 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11105 		if (insn->code != (BPF_JMP | BPF_CALL) ||
11106 		    insn->src_reg != BPF_PSEUDO_CALL)
11107 			continue;
11108 		/* Upon error here we cannot fall back to interpreter but
11109 		 * need a hard reject of the program. Thus -EFAULT is
11110 		 * propagated in any case.
11111 		 */
11112 		subprog = find_subprog(env, i + insn->imm + 1);
11113 		if (subprog < 0) {
11114 			WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
11115 				  i + insn->imm + 1);
11116 			return -EFAULT;
11117 		}
11118 		/* temporarily remember subprog id inside insn instead of
11119 		 * aux_data, since next loop will split up all insns into funcs
11120 		 */
11121 		insn->off = subprog;
11122 		/* remember original imm in case JIT fails and fallback
11123 		 * to interpreter will be needed
11124 		 */
11125 		env->insn_aux_data[i].call_imm = insn->imm;
11126 		/* point imm to __bpf_call_base+1 from JITs point of view */
11127 		insn->imm = 1;
11128 	}
11129 
11130 	err = bpf_prog_alloc_jited_linfo(prog);
11131 	if (err)
11132 		goto out_undo_insn;
11133 
11134 	err = -ENOMEM;
11135 	func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
11136 	if (!func)
11137 		goto out_undo_insn;
11138 
11139 	for (i = 0; i < env->subprog_cnt; i++) {
11140 		subprog_start = subprog_end;
11141 		subprog_end = env->subprog_info[i + 1].start;
11142 
11143 		len = subprog_end - subprog_start;
11144 		/* BPF_PROG_RUN doesn't call subprogs directly,
11145 		 * hence main prog stats include the runtime of subprogs.
11146 		 * subprogs don't have IDs and not reachable via prog_get_next_id
11147 		 * func[i]->aux->stats will never be accessed and stays NULL
11148 		 */
11149 		func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
11150 		if (!func[i])
11151 			goto out_free;
11152 		memcpy(func[i]->insnsi, &prog->insnsi[subprog_start],
11153 		       len * sizeof(struct bpf_insn));
11154 		func[i]->type = prog->type;
11155 		func[i]->len = len;
11156 		if (bpf_prog_calc_tag(func[i]))
11157 			goto out_free;
11158 		func[i]->is_func = 1;
11159 		func[i]->aux->func_idx = i;
11160 		/* the btf and func_info will be freed only at prog->aux */
11161 		func[i]->aux->btf = prog->aux->btf;
11162 		func[i]->aux->func_info = prog->aux->func_info;
11163 		func[i]->aux->func_info_cnt = prog->aux->func_info_cnt;
11164 
11165 		for (j = 0; j < prog->aux->size_poke_tab; j++) {
11166 			u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
11167 			int ret;
11168 
11169 			if (!(insn_idx >= subprog_start &&
11170 			      insn_idx <= subprog_end))
11171 				continue;
11172 
11173 			ret = bpf_jit_add_poke_descriptor(func[i],
11174 							  &prog->aux->poke_tab[j]);
11175 			if (ret < 0) {
11176 				verbose(env, "adding tail call poke descriptor failed\n");
11177 				goto out_free;
11178 			}
11179 
11180 			func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
11181 
11182 			map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
11183 			ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
11184 			if (ret < 0) {
11185 				verbose(env, "tracking tail call prog failed\n");
11186 				goto out_free;
11187 			}
11188 		}
11189 
11190 		func[i]->aux->name[0] = 'F';
11191 		func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
11192 		func[i]->jit_requested = 1;
11193 		func[i]->aux->linfo = prog->aux->linfo;
11194 		func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11195 		func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11196 		func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11197 		num_exentries = 0;
11198 		insn = func[i]->insnsi;
11199 		for (j = 0; j < func[i]->len; j++, insn++) {
11200 			if (BPF_CLASS(insn->code) == BPF_LDX &&
11201 			    BPF_MODE(insn->code) == BPF_PROBE_MEM)
11202 				num_exentries++;
11203 		}
11204 		func[i]->aux->num_exentries = num_exentries;
11205 		func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
11206 		func[i] = bpf_int_jit_compile(func[i]);
11207 		if (!func[i]->jited) {
11208 			err = -ENOTSUPP;
11209 			goto out_free;
11210 		}
11211 		cond_resched();
11212 	}
11213 
11214 	/* Untrack main program's aux structs so that during map_poke_run()
11215 	 * we will not stumble upon the unfilled poke descriptors; each
11216 	 * of the main program's poke descs got distributed across subprogs
11217 	 * and got tracked onto map, so we are sure that none of them will
11218 	 * be missed after the operation below
11219 	 */
11220 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
11221 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
11222 
11223 		map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11224 	}
11225 
11226 	/* at this point all bpf functions were successfully JITed
11227 	 * now populate all bpf_calls with correct addresses and
11228 	 * run last pass of JIT
11229 	 */
11230 	for (i = 0; i < env->subprog_cnt; i++) {
11231 		insn = func[i]->insnsi;
11232 		for (j = 0; j < func[i]->len; j++, insn++) {
11233 			if (insn->code != (BPF_JMP | BPF_CALL) ||
11234 			    insn->src_reg != BPF_PSEUDO_CALL)
11235 				continue;
11236 			subprog = insn->off;
11237 			insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) -
11238 				    __bpf_call_base;
11239 		}
11240 
11241 		/* we use the aux data to keep a list of the start addresses
11242 		 * of the JITed images for each function in the program
11243 		 *
11244 		 * for some architectures, such as powerpc64, the imm field
11245 		 * might not be large enough to hold the offset of the start
11246 		 * address of the callee's JITed image from __bpf_call_base
11247 		 *
11248 		 * in such cases, we can lookup the start address of a callee
11249 		 * by using its subprog id, available from the off field of
11250 		 * the call instruction, as an index for this list
11251 		 */
11252 		func[i]->aux->func = func;
11253 		func[i]->aux->func_cnt = env->subprog_cnt;
11254 	}
11255 	for (i = 0; i < env->subprog_cnt; i++) {
11256 		old_bpf_func = func[i]->bpf_func;
11257 		tmp = bpf_int_jit_compile(func[i]);
11258 		if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
11259 			verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
11260 			err = -ENOTSUPP;
11261 			goto out_free;
11262 		}
11263 		cond_resched();
11264 	}
11265 
11266 	/* finally lock prog and jit images for all functions and
11267 	 * populate kallsysm
11268 	 */
11269 	for (i = 0; i < env->subprog_cnt; i++) {
11270 		bpf_prog_lock_ro(func[i]);
11271 		bpf_prog_kallsyms_add(func[i]);
11272 	}
11273 
11274 	/* Last step: make now unused interpreter insns from main
11275 	 * prog consistent for later dump requests, so they can
11276 	 * later look the same as if they were interpreted only.
11277 	 */
11278 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11279 		if (insn->code != (BPF_JMP | BPF_CALL) ||
11280 		    insn->src_reg != BPF_PSEUDO_CALL)
11281 			continue;
11282 		insn->off = env->insn_aux_data[i].call_imm;
11283 		subprog = find_subprog(env, i + insn->off + 1);
11284 		insn->imm = subprog;
11285 	}
11286 
11287 	prog->jited = 1;
11288 	prog->bpf_func = func[0]->bpf_func;
11289 	prog->aux->func = func;
11290 	prog->aux->func_cnt = env->subprog_cnt;
11291 	bpf_prog_free_unused_jited_linfo(prog);
11292 	return 0;
11293 out_free:
11294 	for (i = 0; i < env->subprog_cnt; i++) {
11295 		if (!func[i])
11296 			continue;
11297 
11298 		for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
11299 			map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
11300 			map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
11301 		}
11302 		bpf_jit_free(func[i]);
11303 	}
11304 	kfree(func);
11305 out_undo_insn:
11306 	/* cleanup main prog to be interpreted */
11307 	prog->jit_requested = 0;
11308 	for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11309 		if (insn->code != (BPF_JMP | BPF_CALL) ||
11310 		    insn->src_reg != BPF_PSEUDO_CALL)
11311 			continue;
11312 		insn->off = 0;
11313 		insn->imm = env->insn_aux_data[i].call_imm;
11314 	}
11315 	bpf_prog_free_jited_linfo(prog);
11316 	return err;
11317 }
11318 
fixup_call_args(struct bpf_verifier_env * env)11319 static int fixup_call_args(struct bpf_verifier_env *env)
11320 {
11321 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11322 	struct bpf_prog *prog = env->prog;
11323 	struct bpf_insn *insn = prog->insnsi;
11324 	int i, depth;
11325 #endif
11326 	int err = 0;
11327 
11328 	if (env->prog->jit_requested &&
11329 	    !bpf_prog_is_dev_bound(env->prog->aux)) {
11330 		err = jit_subprogs(env);
11331 		if (err == 0)
11332 			return 0;
11333 		if (err == -EFAULT)
11334 			return err;
11335 	}
11336 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11337 	if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11338 		/* When JIT fails the progs with bpf2bpf calls and tail_calls
11339 		 * have to be rejected, since interpreter doesn't support them yet.
11340 		 */
11341 		verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11342 		return -EINVAL;
11343 	}
11344 	for (i = 0; i < prog->len; i++, insn++) {
11345 		if (insn->code != (BPF_JMP | BPF_CALL) ||
11346 		    insn->src_reg != BPF_PSEUDO_CALL)
11347 			continue;
11348 		depth = get_callee_stack_depth(env, insn, i);
11349 		if (depth < 0)
11350 			return depth;
11351 		bpf_patch_call_args(insn, depth);
11352 	}
11353 	err = 0;
11354 #endif
11355 	return err;
11356 }
11357 
11358 /* fixup insn->imm field of bpf_call instructions
11359  * and inline eligible helpers as explicit sequence of BPF instructions
11360  *
11361  * this function is called after eBPF program passed verification
11362  */
fixup_bpf_calls(struct bpf_verifier_env * env)11363 static int fixup_bpf_calls(struct bpf_verifier_env *env)
11364 {
11365 	struct bpf_prog *prog = env->prog;
11366 	bool expect_blinding = bpf_jit_blinding_enabled(prog);
11367 	struct bpf_insn *insn = prog->insnsi;
11368 	const struct bpf_func_proto *fn;
11369 	const int insn_cnt = prog->len;
11370 	const struct bpf_map_ops *ops;
11371 	struct bpf_insn_aux_data *aux;
11372 	struct bpf_insn insn_buf[16];
11373 	struct bpf_prog *new_prog;
11374 	struct bpf_map *map_ptr;
11375 	int i, ret, cnt, delta = 0;
11376 
11377 	for (i = 0; i < insn_cnt; i++, insn++) {
11378 		if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) ||
11379 		    insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
11380 		    insn->code == (BPF_ALU | BPF_MOD | BPF_X) ||
11381 		    insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
11382 			bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
11383 			bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11384 			struct bpf_insn *patchlet;
11385 			struct bpf_insn chk_and_div[] = {
11386 				/* [R,W]x div 0 -> 0 */
11387 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11388 					     BPF_JNE | BPF_K, insn->src_reg,
11389 					     0, 2, 0),
11390 				BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
11391 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11392 				*insn,
11393 			};
11394 			struct bpf_insn chk_and_mod[] = {
11395 				/* [R,W]x mod 0 -> [R,W]x */
11396 				BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) |
11397 					     BPF_JEQ | BPF_K, insn->src_reg,
11398 					     0, 1 + (is64 ? 0 : 1), 0),
11399 				*insn,
11400 				BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11401 				BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
11402 			};
11403 
11404 			patchlet = isdiv ? chk_and_div : chk_and_mod;
11405 			cnt = isdiv ? ARRAY_SIZE(chk_and_div) :
11406 				      ARRAY_SIZE(chk_and_mod) - (is64 ? 2 : 0);
11407 
11408 			new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
11409 			if (!new_prog)
11410 				return -ENOMEM;
11411 
11412 			delta    += cnt - 1;
11413 			env->prog = prog = new_prog;
11414 			insn      = new_prog->insnsi + i + delta;
11415 			continue;
11416 		}
11417 
11418 		if (BPF_CLASS(insn->code) == BPF_LD &&
11419 		    (BPF_MODE(insn->code) == BPF_ABS ||
11420 		     BPF_MODE(insn->code) == BPF_IND)) {
11421 			cnt = env->ops->gen_ld_abs(insn, insn_buf);
11422 			if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11423 				verbose(env, "bpf verifier is misconfigured\n");
11424 				return -EINVAL;
11425 			}
11426 
11427 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11428 			if (!new_prog)
11429 				return -ENOMEM;
11430 
11431 			delta    += cnt - 1;
11432 			env->prog = prog = new_prog;
11433 			insn      = new_prog->insnsi + i + delta;
11434 			continue;
11435 		}
11436 
11437 		if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) ||
11438 		    insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
11439 			const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
11440 			const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
11441 			struct bpf_insn insn_buf[16];
11442 			struct bpf_insn *patch = &insn_buf[0];
11443 			bool issrc, isneg, isimm;
11444 			u32 off_reg;
11445 
11446 			aux = &env->insn_aux_data[i + delta];
11447 			if (!aux->alu_state ||
11448 			    aux->alu_state == BPF_ALU_NON_POINTER)
11449 				continue;
11450 
11451 			isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
11452 			issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
11453 				BPF_ALU_SANITIZE_SRC;
11454 			isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
11455 
11456 			off_reg = issrc ? insn->src_reg : insn->dst_reg;
11457 			if (isimm) {
11458 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11459 			} else {
11460 				if (isneg)
11461 					*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11462 				*patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11463 				*patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
11464 				*patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
11465 				*patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
11466 				*patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
11467 				*patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
11468 			}
11469 			if (!issrc)
11470 				*patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
11471 			insn->src_reg = BPF_REG_AX;
11472 			if (isneg)
11473 				insn->code = insn->code == code_add ?
11474 					     code_sub : code_add;
11475 			*patch++ = *insn;
11476 			if (issrc && isneg && !isimm)
11477 				*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11478 			cnt = patch - insn_buf;
11479 
11480 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11481 			if (!new_prog)
11482 				return -ENOMEM;
11483 
11484 			delta    += cnt - 1;
11485 			env->prog = prog = new_prog;
11486 			insn      = new_prog->insnsi + i + delta;
11487 			continue;
11488 		}
11489 
11490 		if (insn->code != (BPF_JMP | BPF_CALL))
11491 			continue;
11492 		if (insn->src_reg == BPF_PSEUDO_CALL)
11493 			continue;
11494 
11495 		if (insn->imm == BPF_FUNC_get_route_realm)
11496 			prog->dst_needed = 1;
11497 		if (insn->imm == BPF_FUNC_get_prandom_u32)
11498 			bpf_user_rnd_init_once();
11499 		if (insn->imm == BPF_FUNC_override_return)
11500 			prog->kprobe_override = 1;
11501 		if (insn->imm == BPF_FUNC_tail_call) {
11502 			/* If we tail call into other programs, we
11503 			 * cannot make any assumptions since they can
11504 			 * be replaced dynamically during runtime in
11505 			 * the program array.
11506 			 */
11507 			prog->cb_access = 1;
11508 			if (!allow_tail_call_in_subprogs(env))
11509 				prog->aux->stack_depth = MAX_BPF_STACK;
11510 			prog->aux->max_pkt_offset = MAX_PACKET_OFF;
11511 
11512 			/* mark bpf_tail_call as different opcode to avoid
11513 			 * conditional branch in the interpeter for every normal
11514 			 * call and to prevent accidental JITing by JIT compiler
11515 			 * that doesn't support bpf_tail_call yet
11516 			 */
11517 			insn->imm = 0;
11518 			insn->code = BPF_JMP | BPF_TAIL_CALL;
11519 
11520 			aux = &env->insn_aux_data[i + delta];
11521 			if (env->bpf_capable && !expect_blinding &&
11522 			    prog->jit_requested &&
11523 			    !bpf_map_key_poisoned(aux) &&
11524 			    !bpf_map_ptr_poisoned(aux) &&
11525 			    !bpf_map_ptr_unpriv(aux)) {
11526 				struct bpf_jit_poke_descriptor desc = {
11527 					.reason = BPF_POKE_REASON_TAIL_CALL,
11528 					.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11529 					.tail_call.key = bpf_map_key_immediate(aux),
11530 					.insn_idx = i + delta,
11531 				};
11532 
11533 				ret = bpf_jit_add_poke_descriptor(prog, &desc);
11534 				if (ret < 0) {
11535 					verbose(env, "adding tail call poke descriptor failed\n");
11536 					return ret;
11537 				}
11538 
11539 				insn->imm = ret + 1;
11540 				continue;
11541 			}
11542 
11543 			if (!bpf_map_ptr_unpriv(aux))
11544 				continue;
11545 
11546 			/* instead of changing every JIT dealing with tail_call
11547 			 * emit two extra insns:
11548 			 * if (index >= max_entries) goto out;
11549 			 * index &= array->index_mask;
11550 			 * to avoid out-of-bounds cpu speculation
11551 			 */
11552 			if (bpf_map_ptr_poisoned(aux)) {
11553 				verbose(env, "tail_call abusing map_ptr\n");
11554 				return -EINVAL;
11555 			}
11556 
11557 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11558 			insn_buf[0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3,
11559 						  map_ptr->max_entries, 2);
11560 			insn_buf[1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3,
11561 						    container_of(map_ptr,
11562 								 struct bpf_array,
11563 								 map)->index_mask);
11564 			insn_buf[2] = *insn;
11565 			cnt = 3;
11566 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11567 			if (!new_prog)
11568 				return -ENOMEM;
11569 
11570 			delta    += cnt - 1;
11571 			env->prog = prog = new_prog;
11572 			insn      = new_prog->insnsi + i + delta;
11573 			continue;
11574 		}
11575 
11576 		/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11577 		 * and other inlining handlers are currently limited to 64 bit
11578 		 * only.
11579 		 */
11580 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
11581 		    (insn->imm == BPF_FUNC_map_lookup_elem ||
11582 		     insn->imm == BPF_FUNC_map_update_elem ||
11583 		     insn->imm == BPF_FUNC_map_delete_elem ||
11584 		     insn->imm == BPF_FUNC_map_push_elem   ||
11585 		     insn->imm == BPF_FUNC_map_pop_elem    ||
11586 		     insn->imm == BPF_FUNC_map_peek_elem)) {
11587 			aux = &env->insn_aux_data[i + delta];
11588 			if (bpf_map_ptr_poisoned(aux))
11589 				goto patch_call_imm;
11590 
11591 			map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11592 			ops = map_ptr->ops;
11593 			if (insn->imm == BPF_FUNC_map_lookup_elem &&
11594 			    ops->map_gen_lookup) {
11595 				cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11596 				if (cnt == -EOPNOTSUPP)
11597 					goto patch_map_ops_generic;
11598 				if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11599 					verbose(env, "bpf verifier is misconfigured\n");
11600 					return -EINVAL;
11601 				}
11602 
11603 				new_prog = bpf_patch_insn_data(env, i + delta,
11604 							       insn_buf, cnt);
11605 				if (!new_prog)
11606 					return -ENOMEM;
11607 
11608 				delta    += cnt - 1;
11609 				env->prog = prog = new_prog;
11610 				insn      = new_prog->insnsi + i + delta;
11611 				continue;
11612 			}
11613 
11614 			BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
11615 				     (void *(*)(struct bpf_map *map, void *key))NULL));
11616 			BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
11617 				     (int (*)(struct bpf_map *map, void *key))NULL));
11618 			BUILD_BUG_ON(!__same_type(ops->map_update_elem,
11619 				     (int (*)(struct bpf_map *map, void *key, void *value,
11620 					      u64 flags))NULL));
11621 			BUILD_BUG_ON(!__same_type(ops->map_push_elem,
11622 				     (int (*)(struct bpf_map *map, void *value,
11623 					      u64 flags))NULL));
11624 			BUILD_BUG_ON(!__same_type(ops->map_pop_elem,
11625 				     (int (*)(struct bpf_map *map, void *value))NULL));
11626 			BUILD_BUG_ON(!__same_type(ops->map_peek_elem,
11627 				     (int (*)(struct bpf_map *map, void *value))NULL));
11628 patch_map_ops_generic:
11629 			switch (insn->imm) {
11630 			case BPF_FUNC_map_lookup_elem:
11631 				insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
11632 					    __bpf_call_base;
11633 				continue;
11634 			case BPF_FUNC_map_update_elem:
11635 				insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
11636 					    __bpf_call_base;
11637 				continue;
11638 			case BPF_FUNC_map_delete_elem:
11639 				insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
11640 					    __bpf_call_base;
11641 				continue;
11642 			case BPF_FUNC_map_push_elem:
11643 				insn->imm = BPF_CAST_CALL(ops->map_push_elem) -
11644 					    __bpf_call_base;
11645 				continue;
11646 			case BPF_FUNC_map_pop_elem:
11647 				insn->imm = BPF_CAST_CALL(ops->map_pop_elem) -
11648 					    __bpf_call_base;
11649 				continue;
11650 			case BPF_FUNC_map_peek_elem:
11651 				insn->imm = BPF_CAST_CALL(ops->map_peek_elem) -
11652 					    __bpf_call_base;
11653 				continue;
11654 			}
11655 
11656 			goto patch_call_imm;
11657 		}
11658 
11659 		if (prog->jit_requested && BITS_PER_LONG == 64 &&
11660 		    insn->imm == BPF_FUNC_jiffies64) {
11661 			struct bpf_insn ld_jiffies_addr[2] = {
11662 				BPF_LD_IMM64(BPF_REG_0,
11663 					     (unsigned long)&jiffies),
11664 			};
11665 
11666 			insn_buf[0] = ld_jiffies_addr[0];
11667 			insn_buf[1] = ld_jiffies_addr[1];
11668 			insn_buf[2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0,
11669 						  BPF_REG_0, 0);
11670 			cnt = 3;
11671 
11672 			new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
11673 						       cnt);
11674 			if (!new_prog)
11675 				return -ENOMEM;
11676 
11677 			delta    += cnt - 1;
11678 			env->prog = prog = new_prog;
11679 			insn      = new_prog->insnsi + i + delta;
11680 			continue;
11681 		}
11682 
11683 patch_call_imm:
11684 		fn = env->ops->get_func_proto(insn->imm, env->prog);
11685 		/* all functions that have prototype and verifier allowed
11686 		 * programs to call them, must be real in-kernel functions
11687 		 */
11688 		if (!fn->func) {
11689 			verbose(env,
11690 				"kernel subsystem misconfigured func %s#%d\n",
11691 				func_id_name(insn->imm), insn->imm);
11692 			return -EFAULT;
11693 		}
11694 		insn->imm = fn->func - __bpf_call_base;
11695 	}
11696 
11697 	/* Since poke tab is now finalized, publish aux to tracker. */
11698 	for (i = 0; i < prog->aux->size_poke_tab; i++) {
11699 		map_ptr = prog->aux->poke_tab[i].tail_call.map;
11700 		if (!map_ptr->ops->map_poke_track ||
11701 		    !map_ptr->ops->map_poke_untrack ||
11702 		    !map_ptr->ops->map_poke_run) {
11703 			verbose(env, "bpf verifier is misconfigured\n");
11704 			return -EINVAL;
11705 		}
11706 
11707 		ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11708 		if (ret < 0) {
11709 			verbose(env, "tracking tail call prog failed\n");
11710 			return ret;
11711 		}
11712 	}
11713 
11714 	return 0;
11715 }
11716 
free_states(struct bpf_verifier_env * env)11717 static void free_states(struct bpf_verifier_env *env)
11718 {
11719 	struct bpf_verifier_state_list *sl, *sln;
11720 	int i;
11721 
11722 	sl = env->free_list;
11723 	while (sl) {
11724 		sln = sl->next;
11725 		free_verifier_state(&sl->state, false);
11726 		kfree(sl);
11727 		sl = sln;
11728 	}
11729 	env->free_list = NULL;
11730 
11731 	if (!env->explored_states)
11732 		return;
11733 
11734 	for (i = 0; i < state_htab_size(env); i++) {
11735 		sl = env->explored_states[i];
11736 
11737 		while (sl) {
11738 			sln = sl->next;
11739 			free_verifier_state(&sl->state, false);
11740 			kfree(sl);
11741 			sl = sln;
11742 		}
11743 		env->explored_states[i] = NULL;
11744 	}
11745 }
11746 
do_check_common(struct bpf_verifier_env * env,int subprog)11747 static int do_check_common(struct bpf_verifier_env *env, int subprog)
11748 {
11749 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11750 	struct bpf_verifier_state *state;
11751 	struct bpf_reg_state *regs;
11752 	int ret, i;
11753 
11754 	env->prev_linfo = NULL;
11755 	env->pass_cnt++;
11756 
11757 	state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11758 	if (!state)
11759 		return -ENOMEM;
11760 	state->curframe = 0;
11761 	state->speculative = false;
11762 	state->branches = 1;
11763 	state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11764 	if (!state->frame[0]) {
11765 		kfree(state);
11766 		return -ENOMEM;
11767 	}
11768 	env->cur_state = state;
11769 	init_func_state(env, state->frame[0],
11770 			BPF_MAIN_FUNC /* callsite */,
11771 			0 /* frameno */,
11772 			subprog);
11773 
11774 	regs = state->frame[state->curframe]->regs;
11775 	if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11776 		ret = btf_prepare_func_args(env, subprog, regs);
11777 		if (ret)
11778 			goto out;
11779 		for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11780 			if (regs[i].type == PTR_TO_CTX)
11781 				mark_reg_known_zero(env, regs, i);
11782 			else if (regs[i].type == SCALAR_VALUE)
11783 				mark_reg_unknown(env, regs, i);
11784 		}
11785 	} else {
11786 		/* 1st arg to a function */
11787 		regs[BPF_REG_1].type = PTR_TO_CTX;
11788 		mark_reg_known_zero(env, regs, BPF_REG_1);
11789 		ret = btf_check_func_arg_match(env, subprog, regs);
11790 		if (ret == -EFAULT)
11791 			/* unlikely verifier bug. abort.
11792 			 * ret == 0 and ret < 0 are sadly acceptable for
11793 			 * main() function due to backward compatibility.
11794 			 * Like socket filter program may be written as:
11795 			 * int bpf_prog(struct pt_regs *ctx)
11796 			 * and never dereference that ctx in the program.
11797 			 * 'struct pt_regs' is a type mismatch for socket
11798 			 * filter that should be using 'struct __sk_buff'.
11799 			 */
11800 			goto out;
11801 	}
11802 
11803 	ret = do_check(env);
11804 out:
11805 	/* check for NULL is necessary, since cur_state can be freed inside
11806 	 * do_check() under memory pressure.
11807 	 */
11808 	if (env->cur_state) {
11809 		free_verifier_state(env->cur_state, true);
11810 		env->cur_state = NULL;
11811 	}
11812 	while (!pop_stack(env, NULL, NULL, false));
11813 	if (!ret && pop_log)
11814 		bpf_vlog_reset(&env->log, 0);
11815 	free_states(env);
11816 	return ret;
11817 }
11818 
11819 /* Verify all global functions in a BPF program one by one based on their BTF.
11820  * All global functions must pass verification. Otherwise the whole program is rejected.
11821  * Consider:
11822  * int bar(int);
11823  * int foo(int f)
11824  * {
11825  *    return bar(f);
11826  * }
11827  * int bar(int b)
11828  * {
11829  *    ...
11830  * }
11831  * foo() will be verified first for R1=any_scalar_value. During verification it
11832  * will be assumed that bar() already verified successfully and call to bar()
11833  * from foo() will be checked for type match only. Later bar() will be verified
11834  * independently to check that it's safe for R1=any_scalar_value.
11835  */
do_check_subprogs(struct bpf_verifier_env * env)11836 static int do_check_subprogs(struct bpf_verifier_env *env)
11837 {
11838 	struct bpf_prog_aux *aux = env->prog->aux;
11839 	int i, ret;
11840 
11841 	if (!aux->func_info)
11842 		return 0;
11843 
11844 	for (i = 1; i < env->subprog_cnt; i++) {
11845 		if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL)
11846 			continue;
11847 		env->insn_idx = env->subprog_info[i].start;
11848 		WARN_ON_ONCE(env->insn_idx == 0);
11849 		ret = do_check_common(env, i);
11850 		if (ret) {
11851 			return ret;
11852 		} else if (env->log.level & BPF_LOG_LEVEL) {
11853 			verbose(env,
11854 				"Func#%d is safe for any args that match its prototype\n",
11855 				i);
11856 		}
11857 	}
11858 	return 0;
11859 }
11860 
do_check_main(struct bpf_verifier_env * env)11861 static int do_check_main(struct bpf_verifier_env *env)
11862 {
11863 	int ret;
11864 
11865 	env->insn_idx = 0;
11866 	ret = do_check_common(env, 0);
11867 	if (!ret)
11868 		env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11869 	return ret;
11870 }
11871 
11872 
print_verification_stats(struct bpf_verifier_env * env)11873 static void print_verification_stats(struct bpf_verifier_env *env)
11874 {
11875 	int i;
11876 
11877 	if (env->log.level & BPF_LOG_STATS) {
11878 		verbose(env, "verification time %lld usec\n",
11879 			div_u64(env->verification_time, 1000));
11880 		verbose(env, "stack depth ");
11881 		for (i = 0; i < env->subprog_cnt; i++) {
11882 			u32 depth = env->subprog_info[i].stack_depth;
11883 
11884 			verbose(env, "%d", depth);
11885 			if (i + 1 < env->subprog_cnt)
11886 				verbose(env, "+");
11887 		}
11888 		verbose(env, "\n");
11889 	}
11890 	verbose(env, "processed %d insns (limit %d) max_states_per_insn %d "
11891 		"total_states %d peak_states %d mark_read %d\n",
11892 		env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS,
11893 		env->max_states_per_insn, env->total_states,
11894 		env->peak_states, env->longest_mark_read_walk);
11895 }
11896 
check_struct_ops_btf_id(struct bpf_verifier_env * env)11897 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11898 {
11899 	const struct btf_type *t, *func_proto;
11900 	const struct bpf_struct_ops *st_ops;
11901 	const struct btf_member *member;
11902 	struct bpf_prog *prog = env->prog;
11903 	u32 btf_id, member_idx;
11904 	const char *mname;
11905 
11906 	if (!prog->gpl_compatible) {
11907 		verbose(env, "struct ops programs must have a GPL compatible license\n");
11908 		return -EINVAL;
11909 	}
11910 
11911 	btf_id = prog->aux->attach_btf_id;
11912 	st_ops = bpf_struct_ops_find(btf_id);
11913 	if (!st_ops) {
11914 		verbose(env, "attach_btf_id %u is not a supported struct\n",
11915 			btf_id);
11916 		return -ENOTSUPP;
11917 	}
11918 
11919 	t = st_ops->type;
11920 	member_idx = prog->expected_attach_type;
11921 	if (member_idx >= btf_type_vlen(t)) {
11922 		verbose(env, "attach to invalid member idx %u of struct %s\n",
11923 			member_idx, st_ops->name);
11924 		return -EINVAL;
11925 	}
11926 
11927 	member = &btf_type_member(t)[member_idx];
11928 	mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11929 	func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type,
11930 					       NULL);
11931 	if (!func_proto) {
11932 		verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n",
11933 			mname, member_idx, st_ops->name);
11934 		return -EINVAL;
11935 	}
11936 
11937 	if (st_ops->check_member) {
11938 		int err = st_ops->check_member(t, member);
11939 
11940 		if (err) {
11941 			verbose(env, "attach to unsupported member %s of struct %s\n",
11942 				mname, st_ops->name);
11943 			return err;
11944 		}
11945 	}
11946 
11947 	prog->aux->attach_func_proto = func_proto;
11948 	prog->aux->attach_func_name = mname;
11949 	env->ops = st_ops->verifier_ops;
11950 
11951 	return 0;
11952 }
11953 #define SECURITY_PREFIX "security_"
11954 
check_attach_modify_return(unsigned long addr,const char * func_name)11955 static int check_attach_modify_return(unsigned long addr, const char *func_name)
11956 {
11957 	if (within_error_injection_list(addr) ||
11958 	    !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
11959 		return 0;
11960 
11961 	return -EINVAL;
11962 }
11963 
11964 /* non exhaustive list of sleepable bpf_lsm_*() functions */
11965 BTF_SET_START(btf_sleepable_lsm_hooks)
11966 #ifdef CONFIG_BPF_LSM
BTF_ID(func,bpf_lsm_bprm_committed_creds)11967 BTF_ID(func, bpf_lsm_bprm_committed_creds)
11968 #else
11969 BTF_ID_UNUSED
11970 #endif
11971 BTF_SET_END(btf_sleepable_lsm_hooks)
11972 
11973 static int check_sleepable_lsm_hook(u32 btf_id)
11974 {
11975 	return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11976 }
11977 
11978 /* list of non-sleepable functions that are otherwise on
11979  * ALLOW_ERROR_INJECTION list
11980  */
11981 BTF_SET_START(btf_non_sleepable_error_inject)
11982 /* Three functions below can be called from sleepable and non-sleepable context.
11983  * Assume non-sleepable from bpf safety point of view.
11984  */
BTF_ID(func,__add_to_page_cache_locked)11985 BTF_ID(func, __add_to_page_cache_locked)
11986 BTF_ID(func, should_fail_alloc_page)
11987 BTF_ID(func, should_failslab)
11988 BTF_SET_END(btf_non_sleepable_error_inject)
11989 
11990 static int check_non_sleepable_error_inject(u32 btf_id)
11991 {
11992 	return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11993 }
11994 
bpf_check_attach_target(struct bpf_verifier_log * log,const struct bpf_prog * prog,const struct bpf_prog * tgt_prog,u32 btf_id,struct bpf_attach_target_info * tgt_info)11995 int bpf_check_attach_target(struct bpf_verifier_log *log,
11996 			    const struct bpf_prog *prog,
11997 			    const struct bpf_prog *tgt_prog,
11998 			    u32 btf_id,
11999 			    struct bpf_attach_target_info *tgt_info)
12000 {
12001 	bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
12002 	const char prefix[] = "btf_trace_";
12003 	int ret = 0, subprog = -1, i;
12004 	const struct btf_type *t;
12005 	bool conservative = true;
12006 	const char *tname;
12007 	struct btf *btf;
12008 	long addr = 0;
12009 
12010 	if (!btf_id) {
12011 		bpf_log(log, "Tracing programs must provide btf_id\n");
12012 		return -EINVAL;
12013 	}
12014 	btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
12015 	if (!btf) {
12016 		bpf_log(log,
12017 			"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
12018 		return -EINVAL;
12019 	}
12020 	t = btf_type_by_id(btf, btf_id);
12021 	if (!t) {
12022 		bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
12023 		return -EINVAL;
12024 	}
12025 	tname = btf_name_by_offset(btf, t->name_off);
12026 	if (!tname) {
12027 		bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
12028 		return -EINVAL;
12029 	}
12030 	if (tgt_prog) {
12031 		struct bpf_prog_aux *aux = tgt_prog->aux;
12032 
12033 		for (i = 0; i < aux->func_info_cnt; i++)
12034 			if (aux->func_info[i].type_id == btf_id) {
12035 				subprog = i;
12036 				break;
12037 			}
12038 		if (subprog == -1) {
12039 			bpf_log(log, "Subprog %s doesn't exist\n", tname);
12040 			return -EINVAL;
12041 		}
12042 		conservative = aux->func_info_aux[subprog].unreliable;
12043 		if (prog_extension) {
12044 			if (conservative) {
12045 				bpf_log(log,
12046 					"Cannot replace static functions\n");
12047 				return -EINVAL;
12048 			}
12049 			if (!prog->jit_requested) {
12050 				bpf_log(log,
12051 					"Extension programs should be JITed\n");
12052 				return -EINVAL;
12053 			}
12054 		}
12055 		if (!tgt_prog->jited) {
12056 			bpf_log(log, "Can attach to only JITed progs\n");
12057 			return -EINVAL;
12058 		}
12059 		if (tgt_prog->type == prog->type) {
12060 			/* Cannot fentry/fexit another fentry/fexit program.
12061 			 * Cannot attach program extension to another extension.
12062 			 * It's ok to attach fentry/fexit to extension program.
12063 			 */
12064 			bpf_log(log, "Cannot recursively attach\n");
12065 			return -EINVAL;
12066 		}
12067 		if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
12068 		    prog_extension &&
12069 		    (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY ||
12070 		     tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
12071 			/* Program extensions can extend all program types
12072 			 * except fentry/fexit. The reason is the following.
12073 			 * The fentry/fexit programs are used for performance
12074 			 * analysis, stats and can be attached to any program
12075 			 * type except themselves. When extension program is
12076 			 * replacing XDP function it is necessary to allow
12077 			 * performance analysis of all functions. Both original
12078 			 * XDP program and its program extension. Hence
12079 			 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
12080 			 * allowed. If extending of fentry/fexit was allowed it
12081 			 * would be possible to create long call chain
12082 			 * fentry->extension->fentry->extension beyond
12083 			 * reasonable stack size. Hence extending fentry is not
12084 			 * allowed.
12085 			 */
12086 			bpf_log(log, "Cannot extend fentry/fexit\n");
12087 			return -EINVAL;
12088 		}
12089 	} else {
12090 		if (prog_extension) {
12091 			bpf_log(log, "Cannot replace kernel functions\n");
12092 			return -EINVAL;
12093 		}
12094 	}
12095 
12096 	switch (prog->expected_attach_type) {
12097 	case BPF_TRACE_RAW_TP:
12098 		if (tgt_prog) {
12099 			bpf_log(log,
12100 				"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
12101 			return -EINVAL;
12102 		}
12103 		if (!btf_type_is_typedef(t)) {
12104 			bpf_log(log, "attach_btf_id %u is not a typedef\n",
12105 				btf_id);
12106 			return -EINVAL;
12107 		}
12108 		if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
12109 			bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
12110 				btf_id, tname);
12111 			return -EINVAL;
12112 		}
12113 		tname += sizeof(prefix) - 1;
12114 		t = btf_type_by_id(btf, t->type);
12115 		if (!btf_type_is_ptr(t))
12116 			/* should never happen in valid vmlinux build */
12117 			return -EINVAL;
12118 		t = btf_type_by_id(btf, t->type);
12119 		if (!btf_type_is_func_proto(t))
12120 			/* should never happen in valid vmlinux build */
12121 			return -EINVAL;
12122 
12123 		break;
12124 	case BPF_TRACE_ITER:
12125 		if (!btf_type_is_func(t)) {
12126 			bpf_log(log, "attach_btf_id %u is not a function\n",
12127 				btf_id);
12128 			return -EINVAL;
12129 		}
12130 		t = btf_type_by_id(btf, t->type);
12131 		if (!btf_type_is_func_proto(t))
12132 			return -EINVAL;
12133 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12134 		if (ret)
12135 			return ret;
12136 		break;
12137 	default:
12138 		if (!prog_extension)
12139 			return -EINVAL;
12140 		fallthrough;
12141 	case BPF_MODIFY_RETURN:
12142 	case BPF_LSM_MAC:
12143 	case BPF_TRACE_FENTRY:
12144 	case BPF_TRACE_FEXIT:
12145 		if (!btf_type_is_func(t)) {
12146 			bpf_log(log, "attach_btf_id %u is not a function\n",
12147 				btf_id);
12148 			return -EINVAL;
12149 		}
12150 		if (prog_extension &&
12151 		    btf_check_type_match(log, prog, btf, t))
12152 			return -EINVAL;
12153 		t = btf_type_by_id(btf, t->type);
12154 		if (!btf_type_is_func_proto(t))
12155 			return -EINVAL;
12156 
12157 		if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
12158 		    (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
12159 		     prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
12160 			return -EINVAL;
12161 
12162 		if (tgt_prog && conservative)
12163 			t = NULL;
12164 
12165 		ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
12166 		if (ret < 0)
12167 			return ret;
12168 
12169 		if (tgt_prog) {
12170 			if (subprog == 0)
12171 				addr = (long) tgt_prog->bpf_func;
12172 			else
12173 				addr = (long) tgt_prog->aux->func[subprog]->bpf_func;
12174 		} else {
12175 			addr = kallsyms_lookup_name(tname);
12176 			if (!addr) {
12177 				bpf_log(log,
12178 					"The address of function %s cannot be found\n",
12179 					tname);
12180 				return -ENOENT;
12181 			}
12182 		}
12183 
12184 		if (prog->aux->sleepable) {
12185 			ret = -EINVAL;
12186 			switch (prog->type) {
12187 			case BPF_PROG_TYPE_TRACING:
12188 				/* fentry/fexit/fmod_ret progs can be sleepable only if they are
12189 				 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12190 				 */
12191 				if (!check_non_sleepable_error_inject(btf_id) &&
12192 				    within_error_injection_list(addr))
12193 					ret = 0;
12194 				break;
12195 			case BPF_PROG_TYPE_LSM:
12196 				/* LSM progs check that they are attached to bpf_lsm_*() funcs.
12197 				 * Only some of them are sleepable.
12198 				 */
12199 				if (check_sleepable_lsm_hook(btf_id))
12200 					ret = 0;
12201 				break;
12202 			default:
12203 				break;
12204 			}
12205 			if (ret) {
12206 				bpf_log(log, "%s is not sleepable\n", tname);
12207 				return ret;
12208 			}
12209 		} else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12210 			if (tgt_prog) {
12211 				bpf_log(log, "can't modify return codes of BPF programs\n");
12212 				return -EINVAL;
12213 			}
12214 			ret = check_attach_modify_return(addr, tname);
12215 			if (ret) {
12216 				bpf_log(log, "%s() is not modifiable\n", tname);
12217 				return ret;
12218 			}
12219 		}
12220 
12221 		break;
12222 	}
12223 	tgt_info->tgt_addr = addr;
12224 	tgt_info->tgt_name = tname;
12225 	tgt_info->tgt_type = t;
12226 	return 0;
12227 }
12228 
check_attach_btf_id(struct bpf_verifier_env * env)12229 static int check_attach_btf_id(struct bpf_verifier_env *env)
12230 {
12231 	struct bpf_prog *prog = env->prog;
12232 	struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12233 	struct bpf_attach_target_info tgt_info = {};
12234 	u32 btf_id = prog->aux->attach_btf_id;
12235 	struct bpf_trampoline *tr;
12236 	int ret;
12237 	u64 key;
12238 
12239 	if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
12240 	    prog->type != BPF_PROG_TYPE_LSM) {
12241 		verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12242 		return -EINVAL;
12243 	}
12244 
12245 	if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
12246 		return check_struct_ops_btf_id(env);
12247 
12248 	if (prog->type != BPF_PROG_TYPE_TRACING &&
12249 	    prog->type != BPF_PROG_TYPE_LSM &&
12250 	    prog->type != BPF_PROG_TYPE_EXT)
12251 		return 0;
12252 
12253 	ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12254 	if (ret)
12255 		return ret;
12256 
12257 	if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12258 		/* to make freplace equivalent to their targets, they need to
12259 		 * inherit env->ops and expected_attach_type for the rest of the
12260 		 * verification
12261 		 */
12262 		env->ops = bpf_verifier_ops[tgt_prog->type];
12263 		prog->expected_attach_type = tgt_prog->expected_attach_type;
12264 	}
12265 
12266 	/* store info about the attachment target that will be used later */
12267 	prog->aux->attach_func_proto = tgt_info.tgt_type;
12268 	prog->aux->attach_func_name = tgt_info.tgt_name;
12269 
12270 	if (tgt_prog) {
12271 		prog->aux->saved_dst_prog_type = tgt_prog->type;
12272 		prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12273 	}
12274 
12275 	if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12276 		prog->aux->attach_btf_trace = true;
12277 		return 0;
12278 	} else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12279 		if (!bpf_iter_prog_supported(prog))
12280 			return -EINVAL;
12281 		return 0;
12282 	}
12283 
12284 	if (prog->type == BPF_PROG_TYPE_LSM) {
12285 		ret = bpf_lsm_verify_prog(&env->log, prog);
12286 		if (ret < 0)
12287 			return ret;
12288 	}
12289 
12290 	key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12291 	tr = bpf_trampoline_get(key, &tgt_info);
12292 	if (!tr)
12293 		return -ENOMEM;
12294 
12295 	prog->aux->dst_trampoline = tr;
12296 	return 0;
12297 }
12298 
bpf_get_btf_vmlinux(void)12299 struct btf *bpf_get_btf_vmlinux(void)
12300 {
12301 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12302 		mutex_lock(&bpf_verifier_lock);
12303 		if (!btf_vmlinux)
12304 			btf_vmlinux = btf_parse_vmlinux();
12305 		mutex_unlock(&bpf_verifier_lock);
12306 	}
12307 	return btf_vmlinux;
12308 }
12309 
bpf_check(struct bpf_prog ** prog,union bpf_attr * attr,union bpf_attr __user * uattr)12310 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
12311 	      union bpf_attr __user *uattr)
12312 {
12313 	u64 start_time = ktime_get_ns();
12314 	struct bpf_verifier_env *env;
12315 	struct bpf_verifier_log *log;
12316 	int i, len, ret = -EINVAL;
12317 	bool is_priv;
12318 
12319 	/* no program is valid */
12320 	if (ARRAY_SIZE(bpf_verifier_ops) == 0)
12321 		return -EINVAL;
12322 
12323 	/* 'struct bpf_verifier_env' can be global, but since it's not small,
12324 	 * allocate/free it every time bpf_check() is called
12325 	 */
12326 	env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
12327 	if (!env)
12328 		return -ENOMEM;
12329 	log = &env->log;
12330 
12331 	len = (*prog)->len;
12332 	env->insn_aux_data =
12333 		vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
12334 	ret = -ENOMEM;
12335 	if (!env->insn_aux_data)
12336 		goto err_free_env;
12337 	for (i = 0; i < len; i++)
12338 		env->insn_aux_data[i].orig_idx = i;
12339 	env->prog = *prog;
12340 	env->ops = bpf_verifier_ops[env->prog->type];
12341 	is_priv = bpf_capable();
12342 
12343 	bpf_get_btf_vmlinux();
12344 
12345 	/* grab the mutex to protect few globals used by verifier */
12346 	if (!is_priv)
12347 		mutex_lock(&bpf_verifier_lock);
12348 
12349 	if (attr->log_level || attr->log_buf || attr->log_size) {
12350 		/* user requested verbose verifier output
12351 		 * and supplied buffer to store the verification trace
12352 		 */
12353 		log->level = attr->log_level;
12354 		log->ubuf = (char __user *) (unsigned long) attr->log_buf;
12355 		log->len_total = attr->log_size;
12356 
12357 		/* log attributes have to be sane */
12358 		if (!bpf_verifier_log_attr_valid(log)) {
12359 			ret = -EINVAL;
12360 			goto err_unlock;
12361 		}
12362 	}
12363 
12364 	if (IS_ERR(btf_vmlinux)) {
12365 		/* Either gcc or pahole or kernel are broken. */
12366 		verbose(env, "in-kernel BTF is malformed\n");
12367 		ret = PTR_ERR(btf_vmlinux);
12368 		goto skip_full_check;
12369 	}
12370 
12371 	env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
12372 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
12373 		env->strict_alignment = true;
12374 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
12375 		env->strict_alignment = false;
12376 
12377 	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12378 	env->allow_uninit_stack = bpf_allow_uninit_stack();
12379 	env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12380 	env->bypass_spec_v1 = bpf_bypass_spec_v1();
12381 	env->bypass_spec_v4 = bpf_bypass_spec_v4();
12382 	env->bpf_capable = bpf_capable();
12383 
12384 	if (is_priv)
12385 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
12386 
12387 	env->explored_states = kvcalloc(state_htab_size(env),
12388 				       sizeof(struct bpf_verifier_state_list *),
12389 				       GFP_USER);
12390 	ret = -ENOMEM;
12391 	if (!env->explored_states)
12392 		goto skip_full_check;
12393 
12394 	ret = check_subprogs(env);
12395 	if (ret < 0)
12396 		goto skip_full_check;
12397 
12398 	ret = check_btf_info(env, attr, uattr);
12399 	if (ret < 0)
12400 		goto skip_full_check;
12401 
12402 	ret = check_attach_btf_id(env);
12403 	if (ret)
12404 		goto skip_full_check;
12405 
12406 	ret = resolve_pseudo_ldimm64(env);
12407 	if (ret < 0)
12408 		goto skip_full_check;
12409 
12410 	if (bpf_prog_is_dev_bound(env->prog->aux)) {
12411 		ret = bpf_prog_offload_verifier_prep(env->prog);
12412 		if (ret)
12413 			goto skip_full_check;
12414 	}
12415 
12416 	ret = check_cfg(env);
12417 	if (ret < 0)
12418 		goto skip_full_check;
12419 
12420 	ret = do_check_subprogs(env);
12421 	ret = ret ?: do_check_main(env);
12422 
12423 	if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux))
12424 		ret = bpf_prog_offload_finalize(env);
12425 
12426 skip_full_check:
12427 	kvfree(env->explored_states);
12428 
12429 	if (ret == 0)
12430 		ret = check_max_stack_depth(env);
12431 
12432 	/* instruction rewrites happen after this point */
12433 	if (is_priv) {
12434 		if (ret == 0)
12435 			opt_hard_wire_dead_code_branches(env);
12436 		if (ret == 0)
12437 			ret = opt_remove_dead_code(env);
12438 		if (ret == 0)
12439 			ret = opt_remove_nops(env);
12440 	} else {
12441 		if (ret == 0)
12442 			sanitize_dead_code(env);
12443 	}
12444 
12445 	if (ret == 0)
12446 		/* program is valid, convert *(u32*)(ctx + off) accesses */
12447 		ret = convert_ctx_accesses(env);
12448 
12449 	if (ret == 0)
12450 		ret = fixup_bpf_calls(env);
12451 
12452 	/* do 32-bit optimization after insn patching has done so those patched
12453 	 * insns could be handled correctly.
12454 	 */
12455 	if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12456 		ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12457 		env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret
12458 								     : false;
12459 	}
12460 
12461 	if (ret == 0)
12462 		ret = fixup_call_args(env);
12463 
12464 	env->verification_time = ktime_get_ns() - start_time;
12465 	print_verification_stats(env);
12466 
12467 	if (log->level && bpf_verifier_log_full(log))
12468 		ret = -ENOSPC;
12469 	if (log->level && !log->ubuf) {
12470 		ret = -EFAULT;
12471 		goto err_release_maps;
12472 	}
12473 
12474 	if (ret == 0 && env->used_map_cnt) {
12475 		/* if program passed verifier, update used_maps in bpf_prog_info */
12476 		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
12477 							  sizeof(env->used_maps[0]),
12478 							  GFP_KERNEL);
12479 
12480 		if (!env->prog->aux->used_maps) {
12481 			ret = -ENOMEM;
12482 			goto err_release_maps;
12483 		}
12484 
12485 		memcpy(env->prog->aux->used_maps, env->used_maps,
12486 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
12487 		env->prog->aux->used_map_cnt = env->used_map_cnt;
12488 
12489 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
12490 		 * bpf_ld_imm64 instructions
12491 		 */
12492 		convert_pseudo_ld_imm64(env);
12493 	}
12494 
12495 	if (ret == 0)
12496 		adjust_btf_func(env);
12497 
12498 err_release_maps:
12499 	if (!env->prog->aux->used_maps)
12500 		/* if we didn't copy map pointers into bpf_prog_info, release
12501 		 * them now. Otherwise free_used_maps() will release them.
12502 		 */
12503 		release_maps(env);
12504 
12505 	/* extension progs temporarily inherit the attach_type of their targets
12506 	   for verification purposes, so set it back to zero before returning
12507 	 */
12508 	if (env->prog->type == BPF_PROG_TYPE_EXT)
12509 		env->prog->expected_attach_type = 0;
12510 
12511 	*prog = env->prog;
12512 err_unlock:
12513 	if (!is_priv)
12514 		mutex_unlock(&bpf_verifier_lock);
12515 	vfree(env->insn_aux_data);
12516 err_free_env:
12517 	kfree(env);
12518 	return ret;
12519 }
12520