~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/bpf/verifier.c

Version: ~ [ linux-5.16 ] ~ [ linux-5.15.13 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.90 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.170 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.224 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.261 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.296 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.298 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  2  * Copyright (c) 2016 Facebook
  3  *
  4  * This program is free software; you can redistribute it and/or
  5  * modify it under the terms of version 2 of the GNU General Public
  6  * License as published by the Free Software Foundation.
  7  *
  8  * This program is distributed in the hope that it will be useful, but
  9  * WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11  * General Public License for more details.
 12  */
 13 #include <linux/kernel.h>
 14 #include <linux/types.h>
 15 #include <linux/slab.h>
 16 #include <linux/bpf.h>
 17 #include <linux/bpf_verifier.h>
 18 #include <linux/filter.h>
 19 #include <net/netlink.h>
 20 #include <linux/file.h>
 21 #include <linux/vmalloc.h>
 22 #include <linux/stringify.h>
 23 
 24 /* bpf_check() is a static code analyzer that walks eBPF program
 25  * instruction by instruction and updates register/stack state.
 26  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
 27  *
 28  * The first pass is depth-first-search to check that the program is a DAG.
 29  * It rejects the following programs:
 30  * - larger than BPF_MAXINSNS insns
 31  * - if loop is present (detected via back-edge)
 32  * - unreachable insns exist (shouldn't be a forest. program = one function)
 33  * - out of bounds or malformed jumps
 34  * The second pass is all possible path descent from the 1st insn.
 35  * Since it's analyzing all pathes through the program, the length of the
 36  * analysis is limited to 64k insn, which may be hit even if total number of
 37  * insn is less then 4K, but there are too many branches that change stack/regs.
 38  * Number of 'branches to be analyzed' is limited to 1k
 39  *
 40  * On entry to each instruction, each register has a type, and the instruction
 41  * changes the types of the registers depending on instruction semantics.
 42  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
 43  * copied to R1.
 44  *
 45  * All registers are 64-bit.
 46  * R0 - return register
 47  * R1-R5 argument passing registers
 48  * R6-R9 callee saved registers
 49  * R10 - frame pointer read-only
 50  *
 51  * At the start of BPF program the register R1 contains a pointer to bpf_context
 52  * and has type PTR_TO_CTX.
 53  *
 54  * Verifier tracks arithmetic operations on pointers in case:
 55  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 56  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
 57  * 1st insn copies R10 (which has FRAME_PTR) type into R1
 58  * and 2nd arithmetic instruction is pattern matched to recognize
 59  * that it wants to construct a pointer to some element within stack.
 60  * So after 2nd insn, the register R1 has type PTR_TO_STACK
 61  * (and -20 constant is saved for further stack bounds checking).
 62  * Meaning that this reg is a pointer to stack plus known immediate constant.
 63  *
 64  * Most of the time the registers have UNKNOWN_VALUE type, which
 65  * means the register has some value, but it's not a valid pointer.
 66  * (like pointer plus pointer becomes UNKNOWN_VALUE type)
 67  *
 68  * When verifier sees load or store instructions the type of base register
 69  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
 70  * types recognized by check_mem_access() function.
 71  *
 72  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
 73  * and the range of [ptr, ptr + map's value_size) is accessible.
 74  *
 75  * registers used to pass values to function calls are checked against
 76  * function argument constraints.
 77  *
 78  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
 79  * It means that the register type passed to this function must be
 80  * PTR_TO_STACK and it will be used inside the function as
 81  * 'pointer to map element key'
 82  *
 83  * For example the argument constraints for bpf_map_lookup_elem():
 84  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
 85  *   .arg1_type = ARG_CONST_MAP_PTR,
 86  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
 87  *
 88  * ret_type says that this function returns 'pointer to map elem value or null'
 89  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
 90  * 2nd argument should be a pointer to stack, which will be used inside
 91  * the helper function as a pointer to map element key.
 92  *
 93  * On the kernel side the helper function looks like:
 94  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 95  * {
 96  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
 97  *    void *key = (void *) (unsigned long) r2;
 98  *    void *value;
 99  *
100  *    here kernel can access 'key' and 'map' pointers safely, knowing that
101  *    [key, key + map->key_size) bytes are valid and were initialized on
102  *    the stack of eBPF program.
103  * }
104  *
105  * Corresponding eBPF program may look like:
106  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
107  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
108  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
109  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
110  * here verifier looks at prototype of map_lookup_elem() and sees:
111  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
112  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
113  *
114  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
115  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
116  * and were initialized prior to this call.
117  * If it's ok, then verifier allows this BPF_CALL insn and looks at
118  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
119  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
120  * returns ether pointer to map value or NULL.
121  *
122  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
123  * insn, the register holding that pointer in the true branch changes state to
124  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
125  * branch. See check_cond_jmp_op().
126  *
127  * After the call R0 is set to return type of the function and registers R1-R5
128  * are set to NOT_INIT to indicate that they are no longer readable.
129  */
130 
131 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
132 struct bpf_verifier_stack_elem {
133         /* verifer state is 'st'
134          * before processing instruction 'insn_idx'
135          * and after processing instruction 'prev_insn_idx'
136          */
137         struct bpf_verifier_state st;
138         int insn_idx;
139         int prev_insn_idx;
140         struct bpf_verifier_stack_elem *next;
141 };
142 
143 #define BPF_COMPLEXITY_LIMIT_INSNS      98304
144 #define BPF_COMPLEXITY_LIMIT_STACK      1024
145 
146 struct bpf_call_arg_meta {
147         struct bpf_map *map_ptr;
148         bool raw_mode;
149         bool pkt_access;
150         int regno;
151         int access_size;
152 };
153 
154 /* verbose verifier prints what it's seeing
155  * bpf_check() is called under lock, so no race to access these global vars
156  */
157 static u32 log_level, log_size, log_len;
158 static char *log_buf;
159 
160 static DEFINE_MUTEX(bpf_verifier_lock);
161 
162 /* log_level controls verbosity level of eBPF verifier.
163  * verbose() is used to dump the verification trace to the log, so the user
164  * can figure out what's wrong with the program
165  */
166 static __printf(1, 2) void verbose(const char *fmt, ...)
167 {
168         va_list args;
169 
170         if (log_level == 0 || log_len >= log_size - 1)
171                 return;
172 
173         va_start(args, fmt);
174         log_len += vscnprintf(log_buf + log_len, log_size - log_len, fmt, args);
175         va_end(args);
176 }
177 
178 /* string representation of 'enum bpf_reg_type' */
179 static const char * const reg_type_str[] = {
180         [NOT_INIT]              = "?",
181         [UNKNOWN_VALUE]         = "inv",
182         [PTR_TO_CTX]            = "ctx",
183         [CONST_PTR_TO_MAP]      = "map_ptr",
184         [PTR_TO_MAP_VALUE]      = "map_value",
185         [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
186         [PTR_TO_MAP_VALUE_ADJ]  = "map_value_adj",
187         [FRAME_PTR]             = "fp",
188         [PTR_TO_STACK]          = "fp",
189         [CONST_IMM]             = "imm",
190         [PTR_TO_PACKET]         = "pkt",
191         [PTR_TO_PACKET_END]     = "pkt_end",
192 };
193 
194 #define __BPF_FUNC_STR_FN(x) [BPF_FUNC_ ## x] = __stringify(bpf_ ## x)
195 static const char * const func_id_str[] = {
196         __BPF_FUNC_MAPPER(__BPF_FUNC_STR_FN)
197 };
198 #undef __BPF_FUNC_STR_FN
199 
200 static const char *func_id_name(int id)
201 {
202         BUILD_BUG_ON(ARRAY_SIZE(func_id_str) != __BPF_FUNC_MAX_ID);
203 
204         if (id >= 0 && id < __BPF_FUNC_MAX_ID && func_id_str[id])
205                 return func_id_str[id];
206         else
207                 return "unknown";
208 }
209 
210 static void print_verifier_state(struct bpf_verifier_state *state)
211 {
212         struct bpf_reg_state *reg;
213         enum bpf_reg_type t;
214         int i;
215 
216         for (i = 0; i < MAX_BPF_REG; i++) {
217                 reg = &state->regs[i];
218                 t = reg->type;
219                 if (t == NOT_INIT)
220                         continue;
221                 verbose(" R%d=%s", i, reg_type_str[t]);
222                 if (t == CONST_IMM || t == PTR_TO_STACK)
223                         verbose("%lld", reg->imm);
224                 else if (t == PTR_TO_PACKET)
225                         verbose("(id=%d,off=%d,r=%d)",
226                                 reg->id, reg->off, reg->range);
227                 else if (t == UNKNOWN_VALUE && reg->imm)
228                         verbose("%lld", reg->imm);
229                 else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
230                          t == PTR_TO_MAP_VALUE_OR_NULL ||
231                          t == PTR_TO_MAP_VALUE_ADJ)
232                         verbose("(ks=%d,vs=%d,id=%u)",
233                                 reg->map_ptr->key_size,
234                                 reg->map_ptr->value_size,
235                                 reg->id);
236                 if (reg->min_value != BPF_REGISTER_MIN_RANGE)
237                         verbose(",min_value=%lld",
238                                 (long long)reg->min_value);
239                 if (reg->max_value != BPF_REGISTER_MAX_RANGE)
240                         verbose(",max_value=%llu",
241                                 (unsigned long long)reg->max_value);
242         }
243         for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
244                 if (state->stack_slot_type[i] == STACK_SPILL)
245                         verbose(" fp%d=%s", -MAX_BPF_STACK + i,
246                                 reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]);
247         }
248         verbose("\n");
249 }
250 
251 static const char *const bpf_class_string[] = {
252         [BPF_LD]    = "ld",
253         [BPF_LDX]   = "ldx",
254         [BPF_ST]    = "st",
255         [BPF_STX]   = "stx",
256         [BPF_ALU]   = "alu",
257         [BPF_JMP]   = "jmp",
258         [BPF_RET]   = "BUG",
259         [BPF_ALU64] = "alu64",
260 };
261 
262 static const char *const bpf_alu_string[16] = {
263         [BPF_ADD >> 4]  = "+=",
264         [BPF_SUB >> 4]  = "-=",
265         [BPF_MUL >> 4]  = "*=",
266         [BPF_DIV >> 4]  = "/=",
267         [BPF_OR  >> 4]  = "|=",
268         [BPF_AND >> 4]  = "&=",
269         [BPF_LSH >> 4]  = "<<=",
270         [BPF_RSH >> 4]  = ">>=",
271         [BPF_NEG >> 4]  = "neg",
272         [BPF_MOD >> 4]  = "%=",
273         [BPF_XOR >> 4]  = "^=",
274         [BPF_MOV >> 4]  = "=",
275         [BPF_ARSH >> 4] = "s>>=",
276         [BPF_END >> 4]  = "endian",
277 };
278 
279 static const char *const bpf_ldst_string[] = {
280         [BPF_W >> 3]  = "u32",
281         [BPF_H >> 3]  = "u16",
282         [BPF_B >> 3]  = "u8",
283         [BPF_DW >> 3] = "u64",
284 };
285 
286 static const char *const bpf_jmp_string[16] = {
287         [BPF_JA >> 4]   = "jmp",
288         [BPF_JEQ >> 4]  = "==",
289         [BPF_JGT >> 4]  = ">",
290         [BPF_JGE >> 4]  = ">=",
291         [BPF_JSET >> 4] = "&",
292         [BPF_JNE >> 4]  = "!=",
293         [BPF_JSGT >> 4] = "s>",
294         [BPF_JSGE >> 4] = "s>=",
295         [BPF_CALL >> 4] = "call",
296         [BPF_EXIT >> 4] = "exit",
297 };
298 
299 static void print_bpf_insn(const struct bpf_verifier_env *env,
300                            const struct bpf_insn *insn)
301 {
302         u8 class = BPF_CLASS(insn->code);
303 
304         if (class == BPF_ALU || class == BPF_ALU64) {
305                 if (BPF_SRC(insn->code) == BPF_X)
306                         verbose("(%02x) %sr%d %s %sr%d\n",
307                                 insn->code, class == BPF_ALU ? "(u32) " : "",
308                                 insn->dst_reg,
309                                 bpf_alu_string[BPF_OP(insn->code) >> 4],
310                                 class == BPF_ALU ? "(u32) " : "",
311                                 insn->src_reg);
312                 else
313                         verbose("(%02x) %sr%d %s %s%d\n",
314                                 insn->code, class == BPF_ALU ? "(u32) " : "",
315                                 insn->dst_reg,
316                                 bpf_alu_string[BPF_OP(insn->code) >> 4],
317                                 class == BPF_ALU ? "(u32) " : "",
318                                 insn->imm);
319         } else if (class == BPF_STX) {
320                 if (BPF_MODE(insn->code) == BPF_MEM)
321                         verbose("(%02x) *(%s *)(r%d %+d) = r%d\n",
322                                 insn->code,
323                                 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
324                                 insn->dst_reg,
325                                 insn->off, insn->src_reg);
326                 else if (BPF_MODE(insn->code) == BPF_XADD)
327                         verbose("(%02x) lock *(%s *)(r%d %+d) += r%d\n",
328                                 insn->code,
329                                 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
330                                 insn->dst_reg, insn->off,
331                                 insn->src_reg);
332                 else
333                         verbose("BUG_%02x\n", insn->code);
334         } else if (class == BPF_ST) {
335                 if (BPF_MODE(insn->code) != BPF_MEM) {
336                         verbose("BUG_st_%02x\n", insn->code);
337                         return;
338                 }
339                 verbose("(%02x) *(%s *)(r%d %+d) = %d\n",
340                         insn->code,
341                         bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
342                         insn->dst_reg,
343                         insn->off, insn->imm);
344         } else if (class == BPF_LDX) {
345                 if (BPF_MODE(insn->code) != BPF_MEM) {
346                         verbose("BUG_ldx_%02x\n", insn->code);
347                         return;
348                 }
349                 verbose("(%02x) r%d = *(%s *)(r%d %+d)\n",
350                         insn->code, insn->dst_reg,
351                         bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
352                         insn->src_reg, insn->off);
353         } else if (class == BPF_LD) {
354                 if (BPF_MODE(insn->code) == BPF_ABS) {
355                         verbose("(%02x) r0 = *(%s *)skb[%d]\n",
356                                 insn->code,
357                                 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
358                                 insn->imm);
359                 } else if (BPF_MODE(insn->code) == BPF_IND) {
360                         verbose("(%02x) r0 = *(%s *)skb[r%d + %d]\n",
361                                 insn->code,
362                                 bpf_ldst_string[BPF_SIZE(insn->code) >> 3],
363                                 insn->src_reg, insn->imm);
364                 } else if (BPF_MODE(insn->code) == BPF_IMM &&
365                            BPF_SIZE(insn->code) == BPF_DW) {
366                         /* At this point, we already made sure that the second
367                          * part of the ldimm64 insn is accessible.
368                          */
369                         u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
370                         bool map_ptr = insn->src_reg == BPF_PSEUDO_MAP_FD;
371 
372                         if (map_ptr && !env->allow_ptr_leaks)
373                                 imm = 0;
374 
375                         verbose("(%02x) r%d = 0x%llx\n", insn->code,
376                                 insn->dst_reg, (unsigned long long)imm);
377                 } else {
378                         verbose("BUG_ld_%02x\n", insn->code);
379                         return;
380                 }
381         } else if (class == BPF_JMP) {
382                 u8 opcode = BPF_OP(insn->code);
383 
384                 if (opcode == BPF_CALL) {
385                         verbose("(%02x) call %s#%d\n", insn->code,
386                                 func_id_name(insn->imm), insn->imm);
387                 } else if (insn->code == (BPF_JMP | BPF_JA)) {
388                         verbose("(%02x) goto pc%+d\n",
389                                 insn->code, insn->off);
390                 } else if (insn->code == (BPF_JMP | BPF_EXIT)) {
391                         verbose("(%02x) exit\n", insn->code);
392                 } else if (BPF_SRC(insn->code) == BPF_X) {
393                         verbose("(%02x) if r%d %s r%d goto pc%+d\n",
394                                 insn->code, insn->dst_reg,
395                                 bpf_jmp_string[BPF_OP(insn->code) >> 4],
396                                 insn->src_reg, insn->off);
397                 } else {
398                         verbose("(%02x) if r%d %s 0x%x goto pc%+d\n",
399                                 insn->code, insn->dst_reg,
400                                 bpf_jmp_string[BPF_OP(insn->code) >> 4],
401                                 insn->imm, insn->off);
402                 }
403         } else {
404                 verbose("(%02x) %s\n", insn->code, bpf_class_string[class]);
405         }
406 }
407 
408 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx)
409 {
410         struct bpf_verifier_stack_elem *elem;
411         int insn_idx;
412 
413         if (env->head == NULL)
414                 return -1;
415 
416         memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
417         insn_idx = env->head->insn_idx;
418         if (prev_insn_idx)
419                 *prev_insn_idx = env->head->prev_insn_idx;
420         elem = env->head->next;
421         kfree(env->head);
422         env->head = elem;
423         env->stack_size--;
424         return insn_idx;
425 }
426 
427 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
428                                              int insn_idx, int prev_insn_idx)
429 {
430         struct bpf_verifier_stack_elem *elem;
431 
432         elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
433         if (!elem)
434                 goto err;
435 
436         memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
437         elem->insn_idx = insn_idx;
438         elem->prev_insn_idx = prev_insn_idx;
439         elem->next = env->head;
440         env->head = elem;
441         env->stack_size++;
442         if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) {
443                 verbose("BPF program is too complex\n");
444                 goto err;
445         }
446         return &elem->st;
447 err:
448         /* pop all elements and return */
449         while (pop_stack(env, NULL) >= 0);
450         return NULL;
451 }
452 
453 #define CALLER_SAVED_REGS 6
454 static const int caller_saved[CALLER_SAVED_REGS] = {
455         BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
456 };
457 
458 static void init_reg_state(struct bpf_reg_state *regs)
459 {
460         int i;
461 
462         for (i = 0; i < MAX_BPF_REG; i++) {
463                 regs[i].type = NOT_INIT;
464                 regs[i].imm = 0;
465                 regs[i].min_value = BPF_REGISTER_MIN_RANGE;
466                 regs[i].max_value = BPF_REGISTER_MAX_RANGE;
467         }
468 
469         /* frame pointer */
470         regs[BPF_REG_FP].type = FRAME_PTR;
471 
472         /* 1st arg to a function */
473         regs[BPF_REG_1].type = PTR_TO_CTX;
474 }
475 
476 static void __mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
477 {
478         regs[regno].type = UNKNOWN_VALUE;
479         regs[regno].id = 0;
480         regs[regno].imm = 0;
481 }
482 
483 static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno)
484 {
485         BUG_ON(regno >= MAX_BPF_REG);
486         __mark_reg_unknown_value(regs, regno);
487 }
488 
489 static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno)
490 {
491         regs[regno].min_value = BPF_REGISTER_MIN_RANGE;
492         regs[regno].max_value = BPF_REGISTER_MAX_RANGE;
493 }
494 
495 static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs,
496                                              u32 regno)
497 {
498         mark_reg_unknown_value(regs, regno);
499         reset_reg_range_values(regs, regno);
500 }
501 
502 enum reg_arg_type {
503         SRC_OP,         /* register is used as source operand */
504         DST_OP,         /* register is used as destination operand */
505         DST_OP_NO_MARK  /* same as above, check only, don't mark */
506 };
507 
508 static int check_reg_arg(struct bpf_reg_state *regs, u32 regno,
509                          enum reg_arg_type t)
510 {
511         if (regno >= MAX_BPF_REG) {
512                 verbose("R%d is invalid\n", regno);
513                 return -EINVAL;
514         }
515 
516         if (t == SRC_OP) {
517                 /* check whether register used as source operand can be read */
518                 if (regs[regno].type == NOT_INIT) {
519                         verbose("R%d !read_ok\n", regno);
520                         return -EACCES;
521                 }
522         } else {
523                 /* check whether register used as dest operand can be written to */
524                 if (regno == BPF_REG_FP) {
525                         verbose("frame pointer is read only\n");
526                         return -EACCES;
527                 }
528                 if (t == DST_OP)
529                         mark_reg_unknown_value(regs, regno);
530         }
531         return 0;
532 }
533 
534 static int bpf_size_to_bytes(int bpf_size)
535 {
536         if (bpf_size == BPF_W)
537                 return 4;
538         else if (bpf_size == BPF_H)
539                 return 2;
540         else if (bpf_size == BPF_B)
541                 return 1;
542         else if (bpf_size == BPF_DW)
543                 return 8;
544         else
545                 return -EINVAL;
546 }
547 
548 static bool is_spillable_regtype(enum bpf_reg_type type)
549 {
550         switch (type) {
551         case PTR_TO_MAP_VALUE:
552         case PTR_TO_MAP_VALUE_OR_NULL:
553         case PTR_TO_MAP_VALUE_ADJ:
554         case PTR_TO_STACK:
555         case PTR_TO_CTX:
556         case PTR_TO_PACKET:
557         case PTR_TO_PACKET_END:
558         case FRAME_PTR:
559         case CONST_PTR_TO_MAP:
560                 return true;
561         default:
562                 return false;
563         }
564 }
565 
566 /* check_stack_read/write functions track spill/fill of registers,
567  * stack boundary and alignment are checked in check_mem_access()
568  */
569 static int check_stack_write(struct bpf_verifier_state *state, int off,
570                              int size, int value_regno)
571 {
572         int i;
573         /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
574          * so it's aligned access and [off, off + size) are within stack limits
575          */
576 
577         if (value_regno >= 0 &&
578             is_spillable_regtype(state->regs[value_regno].type)) {
579 
580                 /* register containing pointer is being spilled into stack */
581                 if (size != BPF_REG_SIZE) {
582                         verbose("invalid size of register spill\n");
583                         return -EACCES;
584                 }
585 
586                 /* save register state */
587                 state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
588                         state->regs[value_regno];
589 
590                 for (i = 0; i < BPF_REG_SIZE; i++)
591                         state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL;
592         } else {
593                 /* regular write of data into stack */
594                 state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] =
595                         (struct bpf_reg_state) {};
596 
597                 for (i = 0; i < size; i++)
598                         state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC;
599         }
600         return 0;
601 }
602 
603 static int check_stack_read(struct bpf_verifier_state *state, int off, int size,
604                             int value_regno)
605 {
606         u8 *slot_type;
607         int i;
608 
609         slot_type = &state->stack_slot_type[MAX_BPF_STACK + off];
610 
611         if (slot_type[0] == STACK_SPILL) {
612                 if (size != BPF_REG_SIZE) {
613                         verbose("invalid size of register spill\n");
614                         return -EACCES;
615                 }
616                 for (i = 1; i < BPF_REG_SIZE; i++) {
617                         if (slot_type[i] != STACK_SPILL) {
618                                 verbose("corrupted spill memory\n");
619                                 return -EACCES;
620                         }
621                 }
622 
623                 if (value_regno >= 0)
624                         /* restore register state from stack */
625                         state->regs[value_regno] =
626                                 state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE];
627                 return 0;
628         } else {
629                 for (i = 0; i < size; i++) {
630                         if (slot_type[i] != STACK_MISC) {
631                                 verbose("invalid read from stack off %d+%d size %d\n",
632                                         off, i, size);
633                                 return -EACCES;
634                         }
635                 }
636                 if (value_regno >= 0)
637                         /* have read misc data from the stack */
638                         mark_reg_unknown_value_and_range(state->regs,
639                                                          value_regno);
640                 return 0;
641         }
642 }
643 
644 /* check read/write into map element returned by bpf_map_lookup_elem() */
645 static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
646                             int size)
647 {
648         struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
649 
650         if (off < 0 || size <= 0 || off + size > map->value_size) {
651                 verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
652                         map->value_size, off, size);
653                 return -EACCES;
654         }
655         return 0;
656 }
657 
658 /* check read/write into an adjusted map element */
659 static int check_map_access_adj(struct bpf_verifier_env *env, u32 regno,
660                                 int off, int size)
661 {
662         struct bpf_verifier_state *state = &env->cur_state;
663         struct bpf_reg_state *reg = &state->regs[regno];
664         int err;
665 
666         /* We adjusted the register to this map value, so we
667          * need to change off and size to min_value and max_value
668          * respectively to make sure our theoretical access will be
669          * safe.
670          */
671         if (log_level)
672                 print_verifier_state(state);
673         env->varlen_map_value_access = true;
674         /* The minimum value is only important with signed
675          * comparisons where we can't assume the floor of a
676          * value is 0.  If we are using signed variables for our
677          * index'es we need to make sure that whatever we use
678          * will have a set floor within our range.
679          */
680         if (reg->min_value < 0) {
681                 verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n",
682                         regno);
683                 return -EACCES;
684         }
685         err = check_map_access(env, regno, reg->min_value + off, size);
686         if (err) {
687                 verbose("R%d min value is outside of the array range\n",
688                         regno);
689                 return err;
690         }
691 
692         /* If we haven't set a max value then we need to bail
693          * since we can't be sure we won't do bad things.
694          */
695         if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
696                 verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n",
697                         regno);
698                 return -EACCES;
699         }
700         return check_map_access(env, regno, reg->max_value + off, size);
701 }
702 
703 #define MAX_PACKET_OFF 0xffff
704 
705 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
706                                        const struct bpf_call_arg_meta *meta,
707                                        enum bpf_access_type t)
708 {
709         switch (env->prog->type) {
710         case BPF_PROG_TYPE_LWT_IN:
711         case BPF_PROG_TYPE_LWT_OUT:
712                 /* dst_input() and dst_output() can't write for now */
713                 if (t == BPF_WRITE)
714                         return false;
715                 /* fallthrough */
716         case BPF_PROG_TYPE_SCHED_CLS:
717         case BPF_PROG_TYPE_SCHED_ACT:
718         case BPF_PROG_TYPE_XDP:
719         case BPF_PROG_TYPE_LWT_XMIT:
720                 if (meta)
721                         return meta->pkt_access;
722 
723                 env->seen_direct_write = true;
724                 return true;
725         default:
726                 return false;
727         }
728 }
729 
730 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
731                                int size)
732 {
733         struct bpf_reg_state *regs = env->cur_state.regs;
734         struct bpf_reg_state *reg = &regs[regno];
735 
736         off += reg->off;
737         if (off < 0 || size <= 0 || off + size > reg->range) {
738                 verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
739                         off, size, regno, reg->id, reg->off, reg->range);
740                 return -EACCES;
741         }
742         return 0;
743 }
744 
745 /* check access to 'struct bpf_context' fields */
746 static int check_ctx_access(struct bpf_verifier_env *env, int off, int size,
747                             enum bpf_access_type t, enum bpf_reg_type *reg_type)
748 {
749         /* for analyzer ctx accesses are already validated and converted */
750         if (env->analyzer_ops)
751                 return 0;
752 
753         if (env->prog->aux->ops->is_valid_access &&
754             env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) {
755                 /* remember the offset of last byte accessed in ctx */
756                 if (env->prog->aux->max_ctx_offset < off + size)
757                         env->prog->aux->max_ctx_offset = off + size;
758                 return 0;
759         }
760 
761         verbose("invalid bpf_context access off=%d size=%d\n", off, size);
762         return -EACCES;
763 }
764 
765 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
766 {
767         if (env->allow_ptr_leaks)
768                 return false;
769 
770         switch (env->cur_state.regs[regno].type) {
771         case UNKNOWN_VALUE:
772         case CONST_IMM:
773                 return false;
774         default:
775                 return true;
776         }
777 }
778 
779 static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg,
780                                    int off, int size)
781 {
782         if (reg->id && size != 1) {
783                 verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n");
784                 return -EACCES;
785         }
786 
787         /* skb->data is NET_IP_ALIGN-ed */
788         if ((NET_IP_ALIGN + reg->off + off) % size != 0) {
789                 verbose("misaligned packet access off %d+%d+%d size %d\n",
790                         NET_IP_ALIGN, reg->off, off, size);
791                 return -EACCES;
792         }
793 
794         return 0;
795 }
796 
797 static int check_val_ptr_alignment(const struct bpf_reg_state *reg,
798                                    int size)
799 {
800         if (size != 1) {
801                 verbose("Unknown alignment. Only byte-sized access allowed in value access.\n");
802                 return -EACCES;
803         }
804 
805         return 0;
806 }
807 
808 static int check_ptr_alignment(const struct bpf_reg_state *reg,
809                                int off, int size)
810 {
811         switch (reg->type) {
812         case PTR_TO_PACKET:
813                 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
814                        check_pkt_ptr_alignment(reg, off, size);
815         case PTR_TO_MAP_VALUE_ADJ:
816                 return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 :
817                        check_val_ptr_alignment(reg, size);
818         default:
819                 if (off % size != 0) {
820                         verbose("misaligned access off %d size %d\n",
821                                 off, size);
822                         return -EACCES;
823                 }
824 
825                 return 0;
826         }
827 }
828 
829 /* check whether memory at (regno + off) is accessible for t = (read | write)
830  * if t==write, value_regno is a register which value is stored into memory
831  * if t==read, value_regno is a register which will receive the value from memory
832  * if t==write && value_regno==-1, some unknown value is stored into memory
833  * if t==read && value_regno==-1, don't care what we read from memory
834  */
835 static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off,
836                             int bpf_size, enum bpf_access_type t,
837                             int value_regno)
838 {
839         struct bpf_verifier_state *state = &env->cur_state;
840         struct bpf_reg_state *reg = &state->regs[regno];
841         int size, err = 0;
842 
843         if (reg->type == PTR_TO_STACK)
844                 off += reg->imm;
845 
846         size = bpf_size_to_bytes(bpf_size);
847         if (size < 0)
848                 return size;
849 
850         err = check_ptr_alignment(reg, off, size);
851         if (err)
852                 return err;
853 
854         if (reg->type == PTR_TO_MAP_VALUE ||
855             reg->type == PTR_TO_MAP_VALUE_ADJ) {
856                 if (t == BPF_WRITE && value_regno >= 0 &&
857                     is_pointer_value(env, value_regno)) {
858                         verbose("R%d leaks addr into map\n", value_regno);
859                         return -EACCES;
860                 }
861 
862                 if (reg->type == PTR_TO_MAP_VALUE_ADJ)
863                         err = check_map_access_adj(env, regno, off, size);
864                 else
865                         err = check_map_access(env, regno, off, size);
866                 if (!err && t == BPF_READ && value_regno >= 0)
867                         mark_reg_unknown_value_and_range(state->regs,
868                                                          value_regno);
869 
870         } else if (reg->type == PTR_TO_CTX) {
871                 enum bpf_reg_type reg_type = UNKNOWN_VALUE;
872 
873                 if (t == BPF_WRITE && value_regno >= 0 &&
874                     is_pointer_value(env, value_regno)) {
875                         verbose("R%d leaks addr into ctx\n", value_regno);
876                         return -EACCES;
877                 }
878                 err = check_ctx_access(env, off, size, t, &reg_type);
879                 if (!err && t == BPF_READ && value_regno >= 0) {
880                         mark_reg_unknown_value_and_range(state->regs,
881                                                          value_regno);
882                         /* note that reg.[id|off|range] == 0 */
883                         state->regs[value_regno].type = reg_type;
884                 }
885 
886         } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) {
887                 if (off >= 0 || off < -MAX_BPF_STACK) {
888                         verbose("invalid stack off=%d size=%d\n", off, size);
889                         return -EACCES;
890                 }
891                 if (t == BPF_WRITE) {
892                         if (!env->allow_ptr_leaks &&
893                             state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL &&
894                             size != BPF_REG_SIZE) {
895                                 verbose("attempt to corrupt spilled pointer on stack\n");
896                                 return -EACCES;
897                         }
898                         err = check_stack_write(state, off, size, value_regno);
899                 } else {
900                         err = check_stack_read(state, off, size, value_regno);
901                 }
902         } else if (state->regs[regno].type == PTR_TO_PACKET) {
903                 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
904                         verbose("cannot write into packet\n");
905                         return -EACCES;
906                 }
907                 if (t == BPF_WRITE && value_regno >= 0 &&
908                     is_pointer_value(env, value_regno)) {
909                         verbose("R%d leaks addr into packet\n", value_regno);
910                         return -EACCES;
911                 }
912                 err = check_packet_access(env, regno, off, size);
913                 if (!err && t == BPF_READ && value_regno >= 0)
914                         mark_reg_unknown_value_and_range(state->regs,
915                                                          value_regno);
916         } else {
917                 verbose("R%d invalid mem access '%s'\n",
918                         regno, reg_type_str[reg->type]);
919                 return -EACCES;
920         }
921 
922         if (!err && size <= 2 && value_regno >= 0 && env->allow_ptr_leaks &&
923             state->regs[value_regno].type == UNKNOWN_VALUE) {
924                 /* 1 or 2 byte load zero-extends, determine the number of
925                  * zero upper bits. Not doing it fo 4 byte load, since
926                  * such values cannot be added to ptr_to_packet anyway.
927                  */
928                 state->regs[value_regno].imm = 64 - size * 8;
929         }
930         return err;
931 }
932 
933 static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn)
934 {
935         struct bpf_reg_state *regs = env->cur_state.regs;
936         int err;
937 
938         if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
939             insn->imm != 0) {
940                 verbose("BPF_XADD uses reserved fields\n");
941                 return -EINVAL;
942         }
943 
944         /* check src1 operand */
945         err = check_reg_arg(regs, insn->src_reg, SRC_OP);
946         if (err)
947                 return err;
948 
949         /* check src2 operand */
950         err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
951         if (err)
952                 return err;
953 
954         if (is_pointer_value(env, insn->src_reg)) {
955                 verbose("R%d leaks addr into mem\n", insn->src_reg);
956                 return -EACCES;
957         }
958 
959         /* check whether atomic_add can read the memory */
960         err = check_mem_access(env, insn->dst_reg, insn->off,
961                                BPF_SIZE(insn->code), BPF_READ, -1);
962         if (err)
963                 return err;
964 
965         /* check whether atomic_add can write into the same memory */
966         return check_mem_access(env, insn->dst_reg, insn->off,
967                                 BPF_SIZE(insn->code), BPF_WRITE, -1);
968 }
969 
970 /* when register 'regno' is passed into function that will read 'access_size'
971  * bytes from that pointer, make sure that it's within stack boundary
972  * and all elements of stack are initialized
973  */
974 static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
975                                 int access_size, bool zero_size_allowed,
976                                 struct bpf_call_arg_meta *meta)
977 {
978         struct bpf_verifier_state *state = &env->cur_state;
979         struct bpf_reg_state *regs = state->regs;
980         int off, i;
981 
982         if (regs[regno].type != PTR_TO_STACK) {
983                 if (zero_size_allowed && access_size == 0 &&
984                     regs[regno].type == CONST_IMM &&
985                     regs[regno].imm  == 0)
986                         return 0;
987 
988                 verbose("R%d type=%s expected=%s\n", regno,
989                         reg_type_str[regs[regno].type],
990                         reg_type_str[PTR_TO_STACK]);
991                 return -EACCES;
992         }
993 
994         off = regs[regno].imm;
995         if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
996             access_size <= 0) {
997                 verbose("invalid stack type R%d off=%d access_size=%d\n",
998                         regno, off, access_size);
999                 return -EACCES;
1000         }
1001 
1002         if (meta && meta->raw_mode) {
1003                 meta->access_size = access_size;
1004                 meta->regno = regno;
1005                 return 0;
1006         }
1007 
1008         for (i = 0; i < access_size; i++) {
1009                 if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) {
1010                         verbose("invalid indirect read from stack off %d+%d size %d\n",
1011                                 off, i, access_size);
1012                         return -EACCES;
1013                 }
1014         }
1015         return 0;
1016 }
1017 
1018 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
1019                                    int access_size, bool zero_size_allowed,
1020                                    struct bpf_call_arg_meta *meta)
1021 {
1022         struct bpf_reg_state *regs = env->cur_state.regs;
1023 
1024         switch (regs[regno].type) {
1025         case PTR_TO_PACKET:
1026                 return check_packet_access(env, regno, 0, access_size);
1027         case PTR_TO_MAP_VALUE:
1028                 return check_map_access(env, regno, 0, access_size);
1029         case PTR_TO_MAP_VALUE_ADJ:
1030                 return check_map_access_adj(env, regno, 0, access_size);
1031         default: /* const_imm|ptr_to_stack or invalid ptr */
1032                 return check_stack_boundary(env, regno, access_size,
1033                                             zero_size_allowed, meta);
1034         }
1035 }
1036 
1037 static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
1038                           enum bpf_arg_type arg_type,
1039                           struct bpf_call_arg_meta *meta)
1040 {
1041         struct bpf_reg_state *regs = env->cur_state.regs, *reg = &regs[regno];
1042         enum bpf_reg_type expected_type, type = reg->type;
1043         int err = 0;
1044 
1045         if (arg_type == ARG_DONTCARE)
1046                 return 0;
1047 
1048         if (type == NOT_INIT) {
1049                 verbose("R%d !read_ok\n", regno);
1050                 return -EACCES;
1051         }
1052 
1053         if (arg_type == ARG_ANYTHING) {
1054                 if (is_pointer_value(env, regno)) {
1055                         verbose("R%d leaks addr into helper function\n", regno);
1056                         return -EACCES;
1057                 }
1058                 return 0;
1059         }
1060 
1061         if (type == PTR_TO_PACKET &&
1062             !may_access_direct_pkt_data(env, meta, BPF_READ)) {
1063                 verbose("helper access to the packet is not allowed\n");
1064                 return -EACCES;
1065         }
1066 
1067         if (arg_type == ARG_PTR_TO_MAP_KEY ||
1068             arg_type == ARG_PTR_TO_MAP_VALUE) {
1069                 expected_type = PTR_TO_STACK;
1070                 if (type != PTR_TO_PACKET && type != expected_type)
1071                         goto err_type;
1072         } else if (arg_type == ARG_CONST_SIZE ||
1073                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
1074                 expected_type = CONST_IMM;
1075                 /* One exception. Allow UNKNOWN_VALUE registers when the
1076                  * boundaries are known and don't cause unsafe memory accesses
1077                  */
1078                 if (type != UNKNOWN_VALUE && type != expected_type)
1079                         goto err_type;
1080         } else if (arg_type == ARG_CONST_MAP_PTR) {
1081                 expected_type = CONST_PTR_TO_MAP;
1082                 if (type != expected_type)
1083                         goto err_type;
1084         } else if (arg_type == ARG_PTR_TO_CTX) {
1085                 expected_type = PTR_TO_CTX;
1086                 if (type != expected_type)
1087                         goto err_type;
1088         } else if (arg_type == ARG_PTR_TO_MEM ||
1089                    arg_type == ARG_PTR_TO_UNINIT_MEM) {
1090                 expected_type = PTR_TO_STACK;
1091                 /* One exception here. In case function allows for NULL to be
1092                  * passed in as argument, it's a CONST_IMM type. Final test
1093                  * happens during stack boundary checking.
1094                  */
1095                 if (type == CONST_IMM && reg->imm == 0)
1096                         /* final test in check_stack_boundary() */;
1097                 else if (type != PTR_TO_PACKET && type != PTR_TO_MAP_VALUE &&
1098                          type != PTR_TO_MAP_VALUE_ADJ && type != expected_type)
1099                         goto err_type;
1100                 meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
1101         } else {
1102                 verbose("unsupported arg_type %d\n", arg_type);
1103                 return -EFAULT;
1104         }
1105 
1106         if (arg_type == ARG_CONST_MAP_PTR) {
1107                 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
1108                 meta->map_ptr = reg->map_ptr;
1109         } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
1110                 /* bpf_map_xxx(..., map_ptr, ..., key) call:
1111                  * check that [key, key + map->key_size) are within
1112                  * stack limits and initialized
1113                  */
1114                 if (!meta->map_ptr) {
1115                         /* in function declaration map_ptr must come before
1116                          * map_key, so that it's verified and known before
1117                          * we have to check map_key here. Otherwise it means
1118                          * that kernel subsystem misconfigured verifier
1119                          */
1120                         verbose("invalid map_ptr to access map->key\n");
1121                         return -EACCES;
1122                 }
1123                 if (type == PTR_TO_PACKET)
1124                         err = check_packet_access(env, regno, 0,
1125                                                   meta->map_ptr->key_size);
1126                 else
1127                         err = check_stack_boundary(env, regno,
1128                                                    meta->map_ptr->key_size,
1129                                                    false, NULL);
1130         } else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
1131                 /* bpf_map_xxx(..., map_ptr, ..., value) call:
1132                  * check [value, value + map->value_size) validity
1133                  */
1134                 if (!meta->map_ptr) {
1135                         /* kernel subsystem misconfigured verifier */
1136                         verbose("invalid map_ptr to access map->value\n");
1137                         return -EACCES;
1138                 }
1139                 if (type == PTR_TO_PACKET)
1140                         err = check_packet_access(env, regno, 0,
1141                                                   meta->map_ptr->value_size);
1142                 else
1143                         err = check_stack_boundary(env, regno,
1144                                                    meta->map_ptr->value_size,
1145                                                    false, NULL);
1146         } else if (arg_type == ARG_CONST_SIZE ||
1147                    arg_type == ARG_CONST_SIZE_OR_ZERO) {
1148                 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
1149 
1150                 /* bpf_xxx(..., buf, len) call will access 'len' bytes
1151                  * from stack pointer 'buf'. Check it
1152                  * note: regno == len, regno - 1 == buf
1153                  */
1154                 if (regno == 0) {
1155                         /* kernel subsystem misconfigured verifier */
1156                         verbose("ARG_CONST_SIZE cannot be first argument\n");
1157                         return -EACCES;
1158                 }
1159 
1160                 /* If the register is UNKNOWN_VALUE, the access check happens
1161                  * using its boundaries. Otherwise, just use its imm
1162                  */
1163                 if (type == UNKNOWN_VALUE) {
1164                         /* For unprivileged variable accesses, disable raw
1165                          * mode so that the program is required to
1166                          * initialize all the memory that the helper could
1167                          * just partially fill up.
1168                          */
1169                         meta = NULL;
1170 
1171                         if (reg->min_value < 0) {
1172                                 verbose("R%d min value is negative, either use unsigned or 'var &= const'\n",
1173                                         regno);
1174                                 return -EACCES;
1175                         }
1176 
1177                         if (reg->min_value == 0) {
1178                                 err = check_helper_mem_access(env, regno - 1, 0,
1179                                                               zero_size_allowed,
1180                                                               meta);
1181                                 if (err)
1182                                         return err;
1183                         }
1184 
1185                         if (reg->max_value == BPF_REGISTER_MAX_RANGE) {
1186                                 verbose("R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n",
1187                                         regno);
1188                                 return -EACCES;
1189                         }
1190                         err = check_helper_mem_access(env, regno - 1,
1191                                                       reg->max_value,
1192                                                       zero_size_allowed, meta);
1193                         if (err)
1194                                 return err;
1195                 } else {
1196                         /* register is CONST_IMM */
1197                         err = check_helper_mem_access(env, regno - 1, reg->imm,
1198                                                       zero_size_allowed, meta);
1199                 }
1200         }
1201 
1202         return err;
1203 err_type:
1204         verbose("R%d type=%s expected=%s\n", regno,
1205                 reg_type_str[type], reg_type_str[expected_type]);
1206         return -EACCES;
1207 }
1208 
1209 static int check_map_func_compatibility(struct bpf_map *map, int func_id)
1210 {
1211         if (!map)
1212                 return 0;
1213 
1214         /* We need a two way check, first is from map perspective ... */
1215         switch (map->map_type) {
1216         case BPF_MAP_TYPE_PROG_ARRAY:
1217                 if (func_id != BPF_FUNC_tail_call)
1218                         goto error;
1219                 break;
1220         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
1221                 if (func_id != BPF_FUNC_perf_event_read &&
1222                     func_id != BPF_FUNC_perf_event_output)
1223                         goto error;
1224                 break;
1225         case BPF_MAP_TYPE_STACK_TRACE:
1226                 if (func_id != BPF_FUNC_get_stackid)
1227                         goto error;
1228                 break;
1229         case BPF_MAP_TYPE_CGROUP_ARRAY:
1230                 if (func_id != BPF_FUNC_skb_under_cgroup &&
1231                     func_id != BPF_FUNC_current_task_under_cgroup)
1232                         goto error;
1233                 break;
1234         default:
1235                 break;
1236         }
1237 
1238         /* ... and second from the function itself. */
1239         switch (func_id) {
1240         case BPF_FUNC_tail_call:
1241                 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
1242                         goto error;
1243                 break;
1244         case BPF_FUNC_perf_event_read:
1245         case BPF_FUNC_perf_event_output:
1246                 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY)
1247                         goto error;
1248                 break;
1249         case BPF_FUNC_get_stackid:
1250                 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
1251                         goto error;
1252                 break;
1253         case BPF_FUNC_current_task_under_cgroup:
1254         case BPF_FUNC_skb_under_cgroup:
1255                 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY)
1256                         goto error;
1257                 break;
1258         default:
1259                 break;
1260         }
1261 
1262         return 0;
1263 error:
1264         verbose("cannot pass map_type %d into func %s#%d\n",
1265                 map->map_type, func_id_name(func_id), func_id);
1266         return -EINVAL;
1267 }
1268 
1269 static int check_raw_mode(const struct bpf_func_proto *fn)
1270 {
1271         int count = 0;
1272 
1273         if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM)
1274                 count++;
1275         if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM)
1276                 count++;
1277         if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM)
1278                 count++;
1279         if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM)
1280                 count++;
1281         if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM)
1282                 count++;
1283 
1284         return count > 1 ? -EINVAL : 0;
1285 }
1286 
1287 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
1288 {
1289         struct bpf_verifier_state *state = &env->cur_state;
1290         struct bpf_reg_state *regs = state->regs, *reg;
1291         int i;
1292 
1293         for (i = 0; i < MAX_BPF_REG; i++)
1294                 if (regs[i].type == PTR_TO_PACKET ||
1295                     regs[i].type == PTR_TO_PACKET_END)
1296                         mark_reg_unknown_value(regs, i);
1297 
1298         for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
1299                 if (state->stack_slot_type[i] != STACK_SPILL)
1300                         continue;
1301                 reg = &state->spilled_regs[i / BPF_REG_SIZE];
1302                 if (reg->type != PTR_TO_PACKET &&
1303                     reg->type != PTR_TO_PACKET_END)
1304                         continue;
1305                 reg->type = UNKNOWN_VALUE;
1306                 reg->imm = 0;
1307         }
1308 }
1309 
1310 static int check_call(struct bpf_verifier_env *env, int func_id)
1311 {
1312         struct bpf_verifier_state *state = &env->cur_state;
1313         const struct bpf_func_proto *fn = NULL;
1314         struct bpf_reg_state *regs = state->regs;
1315         struct bpf_reg_state *reg;
1316         struct bpf_call_arg_meta meta;
1317         bool changes_data;
1318         int i, err;
1319 
1320         /* find function prototype */
1321         if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
1322                 verbose("invalid func %s#%d\n", func_id_name(func_id), func_id);
1323                 return -EINVAL;
1324         }
1325 
1326         if (env->prog->aux->ops->get_func_proto)
1327                 fn = env->prog->aux->ops->get_func_proto(func_id);
1328 
1329         if (!fn) {
1330                 verbose("unknown func %s#%d\n", func_id_name(func_id), func_id);
1331                 return -EINVAL;
1332         }
1333 
1334         /* eBPF programs must be GPL compatible to use GPL-ed functions */
1335         if (!env->prog->gpl_compatible && fn->gpl_only) {
1336                 verbose("cannot call GPL only function from proprietary program\n");
1337                 return -EINVAL;
1338         }
1339 
1340         changes_data = bpf_helper_changes_pkt_data(fn->func);
1341 
1342         memset(&meta, 0, sizeof(meta));
1343         meta.pkt_access = fn->pkt_access;
1344 
1345         /* We only support one arg being in raw mode at the moment, which
1346          * is sufficient for the helper functions we have right now.
1347          */
1348         err = check_raw_mode(fn);
1349         if (err) {
1350                 verbose("kernel subsystem misconfigured func %s#%d\n",
1351                         func_id_name(func_id), func_id);
1352                 return err;
1353         }
1354 
1355         /* check args */
1356         err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &meta);
1357         if (err)
1358                 return err;
1359         err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &meta);
1360         if (err)
1361                 return err;
1362         err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &meta);
1363         if (err)
1364                 return err;
1365         err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &meta);
1366         if (err)
1367                 return err;
1368         err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &meta);
1369         if (err)
1370                 return err;
1371 
1372         /* Mark slots with STACK_MISC in case of raw mode, stack offset
1373          * is inferred from register state.
1374          */
1375         for (i = 0; i < meta.access_size; i++) {
1376                 err = check_mem_access(env, meta.regno, i, BPF_B, BPF_WRITE, -1);
1377                 if (err)
1378                         return err;
1379         }
1380 
1381         /* reset caller saved regs */
1382         for (i = 0; i < CALLER_SAVED_REGS; i++) {
1383                 reg = regs + caller_saved[i];
1384                 reg->type = NOT_INIT;
1385                 reg->imm = 0;
1386         }
1387 
1388         /* update return register */
1389         if (fn->ret_type == RET_INTEGER) {
1390                 regs[BPF_REG_0].type = UNKNOWN_VALUE;
1391         } else if (fn->ret_type == RET_VOID) {
1392                 regs[BPF_REG_0].type = NOT_INIT;
1393         } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
1394                 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
1395                 regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0;
1396                 /* remember map_ptr, so that check_map_access()
1397                  * can check 'value_size' boundary of memory access
1398                  * to map element returned from bpf_map_lookup_elem()
1399                  */
1400                 if (meta.map_ptr == NULL) {
1401                         verbose("kernel subsystem misconfigured verifier\n");
1402                         return -EINVAL;
1403                 }
1404                 regs[BPF_REG_0].map_ptr = meta.map_ptr;
1405                 regs[BPF_REG_0].id = ++env->id_gen;
1406         } else {
1407                 verbose("unknown return type %d of func %s#%d\n",
1408                         fn->ret_type, func_id_name(func_id), func_id);
1409                 return -EINVAL;
1410         }
1411 
1412         err = check_map_func_compatibility(meta.map_ptr, func_id);
1413         if (err)
1414                 return err;
1415 
1416         if (changes_data)
1417                 clear_all_pkt_pointers(env);
1418         return 0;
1419 }
1420 
1421 static int check_packet_ptr_add(struct bpf_verifier_env *env,
1422                                 struct bpf_insn *insn)
1423 {
1424         struct bpf_reg_state *regs = env->cur_state.regs;
1425         struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
1426         struct bpf_reg_state *src_reg = &regs[insn->src_reg];
1427         struct bpf_reg_state tmp_reg;
1428         s32 imm;
1429 
1430         if (BPF_SRC(insn->code) == BPF_K) {
1431                 /* pkt_ptr += imm */
1432                 imm = insn->imm;
1433 
1434 add_imm:
1435                 if (imm < 0) {
1436                         verbose("addition of negative constant to packet pointer is not allowed\n");
1437                         return -EACCES;
1438                 }
1439                 if (imm >= MAX_PACKET_OFF ||
1440                     imm + dst_reg->off >= MAX_PACKET_OFF) {
1441                         verbose("constant %d is too large to add to packet pointer\n",
1442                                 imm);
1443                         return -EACCES;
1444                 }
1445                 /* a constant was added to pkt_ptr.
1446                  * Remember it while keeping the same 'id'
1447                  */
1448                 dst_reg->off += imm;
1449         } else {
1450                 if (src_reg->type == PTR_TO_PACKET) {
1451                         /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */
1452                         tmp_reg = *dst_reg;  /* save r7 state */
1453                         *dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */
1454                         src_reg = &tmp_reg;  /* pretend it's src_reg state */
1455                         /* if the checks below reject it, the copy won't matter,
1456                          * since we're rejecting the whole program. If all ok,
1457                          * then imm22 state will be added to r7
1458                          * and r7 will be pkt(id=0,off=22,r=62) while
1459                          * r6 will stay as pkt(id=0,off=0,r=62)
1460                          */
1461                 }
1462 
1463                 if (src_reg->type == CONST_IMM) {
1464                         /* pkt_ptr += reg where reg is known constant */
1465                         imm = src_reg->imm;
1466                         goto add_imm;
1467                 }
1468                 /* disallow pkt_ptr += reg
1469                  * if reg is not uknown_value with guaranteed zero upper bits
1470                  * otherwise pkt_ptr may overflow and addition will become
1471                  * subtraction which is not allowed
1472                  */
1473                 if (src_reg->type != UNKNOWN_VALUE) {
1474                         verbose("cannot add '%s' to ptr_to_packet\n",
1475                                 reg_type_str[src_reg->type]);
1476                         return -EACCES;
1477                 }
1478                 if (src_reg->imm < 48) {
1479                         verbose("cannot add integer value with %lld upper zero bits to ptr_to_packet\n",
1480                                 src_reg->imm);
1481                         return -EACCES;
1482                 }
1483                 /* dst_reg stays as pkt_ptr type and since some positive
1484                  * integer value was added to the pointer, increment its 'id'
1485                  */
1486                 dst_reg->id = ++env->id_gen;
1487 
1488                 /* something was added to pkt_ptr, set range and off to zero */
1489                 dst_reg->off = 0;
1490                 dst_reg->range = 0;
1491         }
1492         return 0;
1493 }
1494 
1495 static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
1496 {
1497         struct bpf_reg_state *regs = env->cur_state.regs;
1498         struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
1499         u8 opcode = BPF_OP(insn->code);
1500         s64 imm_log2;
1501 
1502         /* for type == UNKNOWN_VALUE:
1503          * imm > 0 -> number of zero upper bits
1504          * imm == 0 -> don't track which is the same as all bits can be non-zero
1505          */
1506 
1507         if (BPF_SRC(insn->code) == BPF_X) {
1508                 struct bpf_reg_state *src_reg = &regs[insn->src_reg];
1509 
1510                 if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 &&
1511                     dst_reg->imm && opcode == BPF_ADD) {
1512                         /* dreg += sreg
1513                          * where both have zero upper bits. Adding them
1514                          * can only result making one more bit non-zero
1515                          * in the larger value.
1516                          * Ex. 0xffff (imm=48) + 1 (imm=63) = 0x10000 (imm=47)
1517                          *     0xffff (imm=48) + 0xffff = 0x1fffe (imm=47)
1518                          */
1519                         dst_reg->imm = min(dst_reg->imm, src_reg->imm);
1520                         dst_reg->imm--;
1521                         return 0;
1522                 }
1523                 if (src_reg->type == CONST_IMM && src_reg->imm > 0 &&
1524                     dst_reg->imm && opcode == BPF_ADD) {
1525                         /* dreg += sreg
1526                          * where dreg has zero upper bits and sreg is const.
1527                          * Adding them can only result making one more bit
1528                          * non-zero in the larger value.
1529                          */
1530                         imm_log2 = __ilog2_u64((long long)src_reg->imm);
1531                         dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
1532                         dst_reg->imm--;
1533                         return 0;
1534                 }
1535                 /* all other cases non supported yet, just mark dst_reg */
1536                 dst_reg->imm = 0;
1537                 return 0;
1538         }
1539 
1540         /* sign extend 32-bit imm into 64-bit to make sure that
1541          * negative values occupy bit 63. Note ilog2() would have
1542          * been incorrect, since sizeof(insn->imm) == 4
1543          */
1544         imm_log2 = __ilog2_u64((long long)insn->imm);
1545 
1546         if (dst_reg->imm && opcode == BPF_LSH) {
1547                 /* reg <<= imm
1548                  * if reg was a result of 2 byte load, then its imm == 48
1549                  * which means that upper 48 bits are zero and shifting this reg
1550                  * left by 4 would mean that upper 44 bits are still zero
1551                  */
1552                 dst_reg->imm -= insn->imm;
1553         } else if (dst_reg->imm && opcode == BPF_MUL) {
1554                 /* reg *= imm
1555                  * if multiplying by 14 subtract 4
1556                  * This is conservative calculation of upper zero bits.
1557                  * It's not trying to special case insn->imm == 1 or 0 cases
1558                  */
1559                 dst_reg->imm -= imm_log2 + 1;
1560         } else if (opcode == BPF_AND) {
1561                 /* reg &= imm */
1562                 dst_reg->imm = 63 - imm_log2;
1563         } else if (dst_reg->imm && opcode == BPF_ADD) {
1564                 /* reg += imm */
1565                 dst_reg->imm = min(dst_reg->imm, 63 - imm_log2);
1566                 dst_reg->imm--;
1567         } else if (opcode == BPF_RSH) {
1568                 /* reg >>= imm
1569                  * which means that after right shift, upper bits will be zero
1570                  * note that verifier already checked that
1571                  * 0 <= imm < 64 for shift insn
1572                  */
1573                 dst_reg->imm += insn->imm;
1574                 if (unlikely(dst_reg->imm > 64))
1575                         /* some dumb code did:
1576                          * r2 = *(u32 *)mem;
1577                          * r2 >>= 32;
1578                          * and all bits are zero now */
1579                         dst_reg->imm = 64;
1580         } else {
1581                 /* all other alu ops, means that we don't know what will
1582                  * happen to the value, mark it with unknown number of zero bits
1583                  */
1584                 dst_reg->imm = 0;
1585         }
1586 
1587         if (dst_reg->imm < 0) {
1588                 /* all 64 bits of the register can contain non-zero bits
1589                  * and such value cannot be added to ptr_to_packet, since it
1590                  * may overflow, mark it as unknown to avoid further eval
1591                  */
1592                 dst_reg->imm = 0;
1593         }
1594         return 0;
1595 }
1596 
1597 static int evaluate_reg_imm_alu(struct bpf_verifier_env *env,
1598                                 struct bpf_insn *insn)
1599 {
1600         struct bpf_reg_state *regs = env->cur_state.regs;
1601         struct bpf_reg_state *dst_reg = &regs[insn->dst_reg];
1602         struct bpf_reg_state *src_reg = &regs[insn->src_reg];
1603         u8 opcode = BPF_OP(insn->code);
1604         u64 dst_imm = dst_reg->imm;
1605 
1606         /* dst_reg->type == CONST_IMM here. Simulate execution of insns
1607          * containing ALU ops. Don't care about overflow or negative
1608          * values, just add/sub/... them; registers are in u64.
1609          */
1610         if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_K) {
1611                 dst_imm += insn->imm;
1612         } else if (opcode == BPF_ADD && BPF_SRC(insn->code) == BPF_X &&
1613                    src_reg->type == CONST_IMM) {
1614                 dst_imm += src_reg->imm;
1615         } else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_K) {
1616                 dst_imm -= insn->imm;
1617         } else if (opcode == BPF_SUB && BPF_SRC(insn->code) == BPF_X &&
1618                    src_reg->type == CONST_IMM) {
1619                 dst_imm -= src_reg->imm;
1620         } else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_K) {
1621                 dst_imm *= insn->imm;
1622         } else if (opcode == BPF_MUL && BPF_SRC(insn->code) == BPF_X &&
1623                    src_reg->type == CONST_IMM) {
1624                 dst_imm *= src_reg->imm;
1625         } else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_K) {
1626                 dst_imm |= insn->imm;
1627         } else if (opcode == BPF_OR && BPF_SRC(insn->code) == BPF_X &&
1628                    src_reg->type == CONST_IMM) {
1629                 dst_imm |= src_reg->imm;
1630         } else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_K) {
1631                 dst_imm &= insn->imm;
1632         } else if (opcode == BPF_AND && BPF_SRC(insn->code) == BPF_X &&
1633                    src_reg->type == CONST_IMM) {
1634                 dst_imm &= src_reg->imm;
1635         } else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_K) {
1636                 dst_imm >>= insn->imm;
1637         } else if (opcode == BPF_RSH && BPF_SRC(insn->code) == BPF_X &&
1638                    src_reg->type == CONST_IMM) {
1639                 dst_imm >>= src_reg->imm;
1640         } else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_K) {
1641                 dst_imm <<= insn->imm;
1642         } else if (opcode == BPF_LSH && BPF_SRC(insn->code) == BPF_X &&
1643                    src_reg->type == CONST_IMM) {
1644                 dst_imm <<= src_reg->imm;
1645         } else {
1646                 mark_reg_unknown_value(regs, insn->dst_reg);
1647                 goto out;
1648         }
1649 
1650         dst_reg->imm = dst_imm;
1651 out:
1652         return 0;
1653 }
1654 
1655 static void check_reg_overflow(struct bpf_reg_state *reg)
1656 {
1657         if (reg->max_value > BPF_REGISTER_MAX_RANGE)
1658                 reg->max_value = BPF_REGISTER_MAX_RANGE;
1659         if (reg->min_value < BPF_REGISTER_MIN_RANGE ||
1660             reg->min_value > BPF_REGISTER_MAX_RANGE)
1661                 reg->min_value = BPF_REGISTER_MIN_RANGE;
1662 }
1663 
1664 static void adjust_reg_min_max_vals(struct bpf_verifier_env *env,
1665                                     struct bpf_insn *insn)
1666 {
1667         struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
1668         s64 min_val = BPF_REGISTER_MIN_RANGE;
1669         u64 max_val = BPF_REGISTER_MAX_RANGE;
1670         u8 opcode = BPF_OP(insn->code);
1671 
1672         dst_reg = &regs[insn->dst_reg];
1673         if (BPF_SRC(insn->code) == BPF_X) {
1674                 check_reg_overflow(&regs[insn->src_reg]);
1675                 min_val = regs[insn->src_reg].min_value;
1676                 max_val = regs[insn->src_reg].max_value;
1677 
1678                 /* If the source register is a random pointer then the
1679                  * min_value/max_value values represent the range of the known
1680                  * accesses into that value, not the actual min/max value of the
1681                  * register itself.  In this case we have to reset the reg range
1682                  * values so we know it is not safe to look at.
1683                  */
1684                 if (regs[insn->src_reg].type != CONST_IMM &&
1685                     regs[insn->src_reg].type != UNKNOWN_VALUE) {
1686                         min_val = BPF_REGISTER_MIN_RANGE;
1687                         max_val = BPF_REGISTER_MAX_RANGE;
1688                 }
1689         } else if (insn->imm < BPF_REGISTER_MAX_RANGE &&
1690                    (s64)insn->imm > BPF_REGISTER_MIN_RANGE) {
1691                 min_val = max_val = insn->imm;
1692         }
1693 
1694         /* We don't know anything about what was done to this register, mark it
1695          * as unknown.
1696          */
1697         if (min_val == BPF_REGISTER_MIN_RANGE &&
1698             max_val == BPF_REGISTER_MAX_RANGE) {
1699                 reset_reg_range_values(regs, insn->dst_reg);
1700                 return;
1701         }
1702 
1703         /* If one of our values was at the end of our ranges then we can't just
1704          * do our normal operations to the register, we need to set the values
1705          * to the min/max since they are undefined.
1706          */
1707         if (min_val == BPF_REGISTER_MIN_RANGE)
1708                 dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
1709         if (max_val == BPF_REGISTER_MAX_RANGE)
1710                 dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
1711 
1712         switch (opcode) {
1713         case BPF_ADD:
1714                 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1715                         dst_reg->min_value += min_val;
1716                 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1717                         dst_reg->max_value += max_val;
1718                 break;
1719         case BPF_SUB:
1720                 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1721                         dst_reg->min_value -= min_val;
1722                 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1723                         dst_reg->max_value -= max_val;
1724                 break;
1725         case BPF_MUL:
1726                 if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1727                         dst_reg->min_value *= min_val;
1728                 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1729                         dst_reg->max_value *= max_val;
1730                 break;
1731         case BPF_AND:
1732                 /* Disallow AND'ing of negative numbers, ain't nobody got time
1733                  * for that.  Otherwise the minimum is 0 and the max is the max
1734                  * value we could AND against.
1735                  */
1736                 if (min_val < 0)
1737                         dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
1738                 else
1739                         dst_reg->min_value = 0;
1740                 dst_reg->max_value = max_val;
1741                 break;
1742         case BPF_LSH:
1743                 /* Gotta have special overflow logic here, if we're shifting
1744                  * more than MAX_RANGE then just assume we have an invalid
1745                  * range.
1746                  */
1747                 if (min_val > ilog2(BPF_REGISTER_MAX_RANGE))
1748                         dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
1749                 else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE)
1750                         dst_reg->min_value <<= min_val;
1751 
1752                 if (max_val > ilog2(BPF_REGISTER_MAX_RANGE))
1753                         dst_reg->max_value = BPF_REGISTER_MAX_RANGE;
1754                 else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1755                         dst_reg->max_value <<= max_val;
1756                 break;
1757         case BPF_RSH:
1758                 /* RSH by a negative number is undefined, and the BPF_RSH is an
1759                  * unsigned shift, so make the appropriate casts.
1760                  */
1761                 if (min_val < 0 || dst_reg->min_value < 0)
1762                         dst_reg->min_value = BPF_REGISTER_MIN_RANGE;
1763                 else
1764                         dst_reg->min_value =
1765                                 (u64)(dst_reg->min_value) >> min_val;
1766                 if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE)
1767                         dst_reg->max_value >>= max_val;
1768                 break;
1769         default:
1770                 reset_reg_range_values(regs, insn->dst_reg);
1771                 break;
1772         }
1773 
1774         check_reg_overflow(dst_reg);
1775 }
1776 
1777 /* check validity of 32-bit and 64-bit arithmetic operations */
1778 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
1779 {
1780         struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg;
1781         u8 opcode = BPF_OP(insn->code);
1782         int err;
1783 
1784         if (opcode == BPF_END || opcode == BPF_NEG) {
1785                 if (opcode == BPF_NEG) {
1786                         if (BPF_SRC(insn->code) != 0 ||
1787                             insn->src_reg != BPF_REG_0 ||
1788                             insn->off != 0 || insn->imm != 0) {
1789                                 verbose("BPF_NEG uses reserved fields\n");
1790                                 return -EINVAL;
1791                         }
1792                 } else {
1793                         if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
1794                             (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
1795                                 verbose("BPF_END uses reserved fields\n");
1796                                 return -EINVAL;
1797                         }
1798                 }
1799 
1800                 /* check src operand */
1801                 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1802                 if (err)
1803                         return err;
1804 
1805                 if (is_pointer_value(env, insn->dst_reg)) {
1806                         verbose("R%d pointer arithmetic prohibited\n",
1807                                 insn->dst_reg);
1808                         return -EACCES;
1809                 }
1810 
1811                 /* check dest operand */
1812                 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
1813                 if (err)
1814                         return err;
1815 
1816         } else if (opcode == BPF_MOV) {
1817 
1818                 if (BPF_SRC(insn->code) == BPF_X) {
1819                         if (insn->imm != 0 || insn->off != 0) {
1820                                 verbose("BPF_MOV uses reserved fields\n");
1821                                 return -EINVAL;
1822                         }
1823 
1824                         /* check src operand */
1825                         err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1826                         if (err)
1827                                 return err;
1828                 } else {
1829                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
1830                                 verbose("BPF_MOV uses reserved fields\n");
1831                                 return -EINVAL;
1832                         }
1833                 }
1834 
1835                 /* check dest operand */
1836                 err = check_reg_arg(regs, insn->dst_reg, DST_OP);
1837                 if (err)
1838                         return err;
1839 
1840                 /* we are setting our register to something new, we need to
1841                  * reset its range values.
1842                  */
1843                 reset_reg_range_values(regs, insn->dst_reg);
1844 
1845                 if (BPF_SRC(insn->code) == BPF_X) {
1846                         if (BPF_CLASS(insn->code) == BPF_ALU64) {
1847                                 /* case: R1 = R2
1848                                  * copy register state to dest reg
1849                                  */
1850                                 regs[insn->dst_reg] = regs[insn->src_reg];
1851                         } else {
1852                                 if (is_pointer_value(env, insn->src_reg)) {
1853                                         verbose("R%d partial copy of pointer\n",
1854                                                 insn->src_reg);
1855                                         return -EACCES;
1856                                 }
1857                                 mark_reg_unknown_value(regs, insn->dst_reg);
1858                         }
1859                 } else {
1860                         /* case: R = imm
1861                          * remember the value we stored into this reg
1862                          */
1863                         regs[insn->dst_reg].type = CONST_IMM;
1864                         regs[insn->dst_reg].imm = insn->imm;
1865                         regs[insn->dst_reg].max_value = insn->imm;
1866                         regs[insn->dst_reg].min_value = insn->imm;
1867                 }
1868 
1869         } else if (opcode > BPF_END) {
1870                 verbose("invalid BPF_ALU opcode %x\n", opcode);
1871                 return -EINVAL;
1872 
1873         } else {        /* all other ALU ops: and, sub, xor, add, ... */
1874 
1875                 if (BPF_SRC(insn->code) == BPF_X) {
1876                         if (insn->imm != 0 || insn->off != 0) {
1877                                 verbose("BPF_ALU uses reserved fields\n");
1878                                 return -EINVAL;
1879                         }
1880                         /* check src1 operand */
1881                         err = check_reg_arg(regs, insn->src_reg, SRC_OP);
1882                         if (err)
1883                                 return err;
1884                 } else {
1885                         if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
1886                                 verbose("BPF_ALU uses reserved fields\n");
1887                                 return -EINVAL;
1888                         }
1889                 }
1890 
1891                 /* check src2 operand */
1892                 err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
1893                 if (err)
1894                         return err;
1895 
1896                 if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
1897                     BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
1898                         verbose("div by zero\n");
1899                         return -EINVAL;
1900                 }
1901 
1902                 if ((opcode == BPF_LSH || opcode == BPF_RSH ||
1903                      opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
1904                         int size = BPF_CLASS(insn->code) == BPF_ALU64 ? 64 : 32;
1905 
1906                         if (insn->imm < 0 || insn->imm >= size) {
1907                                 verbose("invalid shift %d\n", insn->imm);
1908                                 return -EINVAL;
1909                         }
1910                 }
1911 
1912                 /* check dest operand */
1913                 err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
1914                 if (err)
1915                         return err;
1916 
1917                 dst_reg = &regs[insn->dst_reg];
1918 
1919                 /* first we want to adjust our ranges. */
1920                 adjust_reg_min_max_vals(env, insn);
1921 
1922                 /* pattern match 'bpf_add Rx, imm' instruction */
1923                 if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
1924                     dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) {
1925                         dst_reg->type = PTR_TO_STACK;
1926                         dst_reg->imm = insn->imm;
1927                         return 0;
1928                 } else if (opcode == BPF_ADD &&
1929                            BPF_CLASS(insn->code) == BPF_ALU64 &&
1930                            dst_reg->type == PTR_TO_STACK &&
1931                            ((BPF_SRC(insn->code) == BPF_X &&
1932                              regs[insn->src_reg].type == CONST_IMM) ||
1933                             BPF_SRC(insn->code) == BPF_K)) {
1934                         if (BPF_SRC(insn->code) == BPF_X)
1935                                 dst_reg->imm += regs[insn->src_reg].imm;
1936                         else
1937                                 dst_reg->imm += insn->imm;
1938                         return 0;
1939                 } else if (opcode == BPF_ADD &&
1940                            BPF_CLASS(insn->code) == BPF_ALU64 &&
1941                            (dst_reg->type == PTR_TO_PACKET ||
1942                             (BPF_SRC(insn->code) == BPF_X &&
1943                              regs[insn->src_reg].type == PTR_TO_PACKET))) {
1944                         /* ptr_to_packet += K|X */
1945                         return check_packet_ptr_add(env, insn);
1946                 } else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
1947                            dst_reg->type == UNKNOWN_VALUE &&
1948                            env->allow_ptr_leaks) {
1949                         /* unknown += K|X */
1950                         return evaluate_reg_alu(env, insn);
1951                 } else if (BPF_CLASS(insn->code) == BPF_ALU64 &&
1952                            dst_reg->type == CONST_IMM &&
1953                            env->allow_ptr_leaks) {
1954                         /* reg_imm += K|X */
1955                         return evaluate_reg_imm_alu(env, insn);
1956                 } else if (is_pointer_value(env, insn->dst_reg)) {
1957                         verbose("R%d pointer arithmetic prohibited\n",
1958                                 insn->dst_reg);
1959                         return -EACCES;
1960                 } else if (BPF_SRC(insn->code) == BPF_X &&
1961                            is_pointer_value(env, insn->src_reg)) {
1962                         verbose("R%d pointer arithmetic prohibited\n",
1963                                 insn->src_reg);
1964                         return -EACCES;
1965                 }
1966 
1967                 /* If we did pointer math on a map value then just set it to our
1968                  * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or
1969                  * loads to this register appropriately, otherwise just mark the
1970                  * register as unknown.
1971                  */
1972                 if (env->allow_ptr_leaks &&
1973                     BPF_CLASS(insn->code) == BPF_ALU64 && opcode == BPF_ADD &&
1974                     (dst_reg->type == PTR_TO_MAP_VALUE ||
1975                      dst_reg->type == PTR_TO_MAP_VALUE_ADJ))
1976                         dst_reg->type = PTR_TO_MAP_VALUE_ADJ;
1977                 else
1978                         mark_reg_unknown_value(regs, insn->dst_reg);
1979         }
1980 
1981         return 0;
1982 }
1983 
1984 static void find_good_pkt_pointers(struct bpf_verifier_state *state,
1985                                    struct bpf_reg_state *dst_reg)
1986 {
1987         struct bpf_reg_state *regs = state->regs, *reg;
1988         int i;
1989 
1990         /* LLVM can generate two kind of checks:
1991          *
1992          * Type 1:
1993          *
1994          *   r2 = r3;
1995          *   r2 += 8;
1996          *   if (r2 > pkt_end) goto <handle exception>
1997          *   <access okay>
1998          *
1999          *   Where:
2000          *     r2 == dst_reg, pkt_end == src_reg
2001          *     r2=pkt(id=n,off=8,r=0)
2002          *     r3=pkt(id=n,off=0,r=0)
2003          *
2004          * Type 2:
2005          *
2006          *   r2 = r3;
2007          *   r2 += 8;
2008          *   if (pkt_end >= r2) goto <access okay>
2009          *   <handle exception>
2010          *
2011          *   Where:
2012          *     pkt_end == dst_reg, r2 == src_reg
2013          *     r2=pkt(id=n,off=8,r=0)
2014          *     r3=pkt(id=n,off=0,r=0)
2015          *
2016          * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
2017          * so that range of bytes [r3, r3 + 8) is safe to access.
2018          */
2019 
2020         for (i = 0; i < MAX_BPF_REG; i++)
2021                 if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id)
2022                         /* keep the maximum range already checked */
2023                         regs[i].range = max(regs[i].range, dst_reg->off);
2024 
2025         for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
2026                 if (state->stack_slot_type[i] != STACK_SPILL)
2027                         continue;
2028                 reg = &state->spilled_regs[i / BPF_REG_SIZE];
2029                 if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id)
2030                         reg->range = max(reg->range, dst_reg->off);
2031         }
2032 }
2033 
2034 /* Adjusts the register min/max values in the case that the dst_reg is the
2035  * variable register that we are working on, and src_reg is a constant or we're
2036  * simply doing a BPF_K check.
2037  */
2038 static void reg_set_min_max(struct bpf_reg_state *true_reg,
2039                             struct bpf_reg_state *false_reg, u64 val,
2040                             u8 opcode)
2041 {
2042         switch (opcode) {
2043         case BPF_JEQ:
2044                 /* If this is false then we know nothing Jon Snow, but if it is
2045                  * true then we know for sure.
2046                  */
2047                 true_reg->max_value = true_reg->min_value = val;
2048                 break;
2049         case BPF_JNE:
2050                 /* If this is true we know nothing Jon Snow, but if it is false
2051                  * we know the value for sure;
2052                  */
2053                 false_reg->max_value = false_reg->min_value = val;
2054                 break;
2055         case BPF_JGT:
2056                 /* Unsigned comparison, the minimum value is 0. */
2057                 false_reg->min_value = 0;
2058                 /* fallthrough */
2059         case BPF_JSGT:
2060                 /* If this is false then we know the maximum val is val,
2061                  * otherwise we know the min val is val+1.
2062                  */
2063                 false_reg->max_value = val;
2064                 true_reg->min_value = val + 1;
2065                 break;
2066         case BPF_JGE:
2067                 /* Unsigned comparison, the minimum value is 0. */
2068                 false_reg->min_value = 0;
2069                 /* fallthrough */
2070         case BPF_JSGE:
2071                 /* If this is false then we know the maximum value is val - 1,
2072                  * otherwise we know the mimimum value is val.
2073                  */
2074                 false_reg->max_value = val - 1;
2075                 true_reg->min_value = val;
2076                 break;
2077         default:
2078                 break;
2079         }
2080 
2081         check_reg_overflow(false_reg);
2082         check_reg_overflow(true_reg);
2083 }
2084 
2085 /* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg
2086  * is the variable reg.
2087  */
2088 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg,
2089                                 struct bpf_reg_state *false_reg, u64 val,
2090                                 u8 opcode)
2091 {
2092         switch (opcode) {
2093         case BPF_JEQ:
2094                 /* If this is false then we know nothing Jon Snow, but if it is
2095                  * true then we know for sure.
2096                  */
2097                 true_reg->max_value = true_reg->min_value = val;
2098                 break;
2099         case BPF_JNE:
2100                 /* If this is true we know nothing Jon Snow, but if it is false
2101                  * we know the value for sure;
2102                  */
2103                 false_reg->max_value = false_reg->min_value = val;
2104                 break;
2105         case BPF_JGT:
2106                 /* Unsigned comparison, the minimum value is 0. */
2107                 true_reg->min_value = 0;
2108                 /* fallthrough */
2109         case BPF_JSGT:
2110                 /*
2111                  * If this is false, then the val is <= the register, if it is
2112                  * true the register <= to the val.
2113                  */
2114                 false_reg->min_value = val;
2115                 true_reg->max_value = val - 1;
2116                 break;
2117         case BPF_JGE:
2118                 /* Unsigned comparison, the minimum value is 0. */
2119                 true_reg->min_value = 0;
2120                 /* fallthrough */
2121         case BPF_JSGE:
2122                 /* If this is false then constant < register, if it is true then
2123                  * the register < constant.
2124                  */
2125                 false_reg->min_value = val + 1;
2126                 true_reg->max_value = val;
2127                 break;
2128         default:
2129                 break;
2130         }
2131 
2132         check_reg_overflow(false_reg);
2133         check_reg_overflow(true_reg);
2134 }
2135 
2136 static void mark_map_reg(struct bpf_reg_state *regs, u32 regno, u32 id,
2137                          enum bpf_reg_type type)
2138 {
2139         struct bpf_reg_state *reg = &regs[regno];
2140 
2141         if (reg->type == PTR_TO_MAP_VALUE_OR_NULL && reg->id == id) {
2142                 reg->type = type;
2143                 /* We don't need id from this point onwards anymore, thus we
2144                  * should better reset it, so that state pruning has chances
2145                  * to take effect.
2146                  */
2147                 reg->id = 0;
2148                 if (type == UNKNOWN_VALUE)
2149                         __mark_reg_unknown_value(regs, regno);
2150         }
2151 }
2152 
2153 /* The logic is similar to find_good_pkt_pointers(), both could eventually
2154  * be folded together at some point.
2155  */
2156 static void mark_map_regs(struct bpf_verifier_state *state, u32 regno,
2157                           enum bpf_reg_type type)
2158 {
2159         struct bpf_reg_state *regs = state->regs;
2160         u32 id = regs[regno].id;
2161         int i;
2162 
2163         for (i = 0; i < MAX_BPF_REG; i++)
2164                 mark_map_reg(regs, i, id, type);
2165 
2166         for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) {
2167                 if (state->stack_slot_type[i] != STACK_SPILL)
2168                         continue;
2169                 mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, type);
2170         }
2171 }
2172 
2173 static int check_cond_jmp_op(struct bpf_verifier_env *env,
2174                              struct bpf_insn *insn, int *insn_idx)
2175 {
2176         struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state;
2177         struct bpf_reg_state *regs = this_branch->regs, *dst_reg;
2178         u8 opcode = BPF_OP(insn->code);
2179         int err;
2180 
2181         if (opcode > BPF_EXIT) {
2182                 verbose("invalid BPF_JMP opcode %x\n", opcode);
2183                 return -EINVAL;
2184         }
2185 
2186         if (BPF_SRC(insn->code) == BPF_X) {
2187                 if (insn->imm != 0) {
2188                         verbose("BPF_JMP uses reserved fields\n");
2189                         return -EINVAL;
2190                 }
2191 
2192                 /* check src1 operand */
2193                 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
2194                 if (err)
2195                         return err;
2196 
2197                 if (is_pointer_value(env, insn->src_reg)) {
2198                         verbose("R%d pointer comparison prohibited\n",
2199                                 insn->src_reg);
2200                         return -EACCES;
2201                 }
2202         } else {
2203                 if (insn->src_reg != BPF_REG_0) {
2204                         verbose("BPF_JMP uses reserved fields\n");
2205                         return -EINVAL;
2206                 }
2207         }
2208 
2209         /* check src2 operand */
2210         err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
2211         if (err)
2212                 return err;
2213 
2214         dst_reg = &regs[insn->dst_reg];
2215 
2216         /* detect if R == 0 where R was initialized to zero earlier */
2217         if (BPF_SRC(insn->code) == BPF_K &&
2218             (opcode == BPF_JEQ || opcode == BPF_JNE) &&
2219             dst_reg->type == CONST_IMM && dst_reg->imm == insn->imm) {
2220                 if (opcode == BPF_JEQ) {
2221                         /* if (imm == imm) goto pc+off;
2222                          * only follow the goto, ignore fall-through
2223                          */
2224                         *insn_idx += insn->off;
2225                         return 0;
2226                 } else {
2227                         /* if (imm != imm) goto pc+off;
2228                          * only follow fall-through branch, since
2229                          * that's where the program will go
2230                          */
2231                         return 0;
2232                 }
2233         }
2234 
2235         other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
2236         if (!other_branch)
2237                 return -EFAULT;
2238 
2239         /* detect if we are comparing against a constant value so we can adjust
2240          * our min/max values for our dst register.
2241          */
2242         if (BPF_SRC(insn->code) == BPF_X) {
2243                 if (regs[insn->src_reg].type == CONST_IMM)
2244                         reg_set_min_max(&other_branch->regs[insn->dst_reg],
2245                                         dst_reg, regs[insn->src_reg].imm,
2246                                         opcode);
2247                 else if (dst_reg->type == CONST_IMM)
2248                         reg_set_min_max_inv(&other_branch->regs[insn->src_reg],
2249                                             &regs[insn->src_reg], dst_reg->imm,
2250                                             opcode);
2251         } else {
2252                 reg_set_min_max(&other_branch->regs[insn->dst_reg],
2253                                         dst_reg, insn->imm, opcode);
2254         }
2255 
2256         /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */
2257         if (BPF_SRC(insn->code) == BPF_K &&
2258             insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
2259             dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
2260                 /* Mark all identical map registers in each branch as either
2261                  * safe or unknown depending R == 0 or R != 0 conditional.
2262                  */
2263                 mark_map_regs(this_branch, insn->dst_reg,
2264                               opcode == BPF_JEQ ? PTR_TO_MAP_VALUE : UNKNOWN_VALUE);
2265                 mark_map_regs(other_branch, insn->dst_reg,
2266                               opcode == BPF_JEQ ? UNKNOWN_VALUE : PTR_TO_MAP_VALUE);
2267         } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT &&
2268                    dst_reg->type == PTR_TO_PACKET &&
2269                    regs[insn->src_reg].type == PTR_TO_PACKET_END) {
2270                 find_good_pkt_pointers(this_branch, dst_reg);
2271         } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE &&
2272                    dst_reg->type == PTR_TO_PACKET_END &&
2273                    regs[insn->src_reg].type == PTR_TO_PACKET) {
2274                 find_good_pkt_pointers(other_branch, &regs[insn->src_reg]);
2275         } else if (is_pointer_value(env, insn->dst_reg)) {
2276                 verbose("R%d pointer comparison prohibited\n", insn->dst_reg);
2277                 return -EACCES;
2278         }
2279         if (log_level)
2280                 print_verifier_state(this_branch);
2281         return 0;
2282 }
2283 
2284 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
2285 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
2286 {
2287         u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
2288 
2289         return (struct bpf_map *) (unsigned long) imm64;
2290 }
2291 
2292 /* verify BPF_LD_IMM64 instruction */
2293 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
2294 {
2295         struct bpf_reg_state *regs = env->cur_state.regs;
2296         int err;
2297 
2298         if (BPF_SIZE(insn->code) != BPF_DW) {
2299                 verbose("invalid BPF_LD_IMM insn\n");
2300                 return -EINVAL;
2301         }
2302         if (insn->off != 0) {
2303                 verbose("BPF_LD_IMM64 uses reserved fields\n");
2304                 return -EINVAL;
2305         }
2306 
2307         err = check_reg_arg(regs, insn->dst_reg, DST_OP);
2308         if (err)
2309                 return err;
2310 
2311         if (insn->src_reg == 0) {
2312                 u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm;
2313 
2314                 regs[insn->dst_reg].type = CONST_IMM;
2315                 regs[insn->dst_reg].imm = imm;
2316                 return 0;
2317         }
2318 
2319         /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
2320         BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
2321 
2322         regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
2323         regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
2324         return 0;
2325 }
2326 
2327 static bool may_access_skb(enum bpf_prog_type type)
2328 {
2329         switch (type) {
2330         case BPF_PROG_TYPE_SOCKET_FILTER:
2331         case BPF_PROG_TYPE_SCHED_CLS:
2332         case BPF_PROG_TYPE_SCHED_ACT:
2333                 return true;
2334         default:
2335                 return false;
2336         }
2337 }
2338 
2339 /* verify safety of LD_ABS|LD_IND instructions:
2340  * - they can only appear in the programs where ctx == skb
2341  * - since they are wrappers of function calls, they scratch R1-R5 registers,
2342  *   preserve R6-R9, and store return value into R0
2343  *
2344  * Implicit input:
2345  *   ctx == skb == R6 == CTX
2346  *
2347  * Explicit input:
2348  *   SRC == any register
2349  *   IMM == 32-bit immediate
2350  *
2351  * Output:
2352  *   R0 - 8/16/32-bit skb data converted to cpu endianness
2353  */
2354 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
2355 {
2356         struct bpf_reg_state *regs = env->cur_state.regs;
2357         u8 mode = BPF_MODE(insn->code);
2358         struct bpf_reg_state *reg;
2359         int i, err;
2360 
2361         if (!may_access_skb(env->prog->type)) {
2362                 verbose("BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
2363                 return -EINVAL;
2364         }
2365 
2366         if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
2367             BPF_SIZE(insn->code) == BPF_DW ||
2368             (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
2369                 verbose("BPF_LD_[ABS|IND] uses reserved fields\n");
2370                 return -EINVAL;
2371         }
2372 
2373         /* check whether implicit source operand (register R6) is readable */
2374         err = check_reg_arg(regs, BPF_REG_6, SRC_OP);
2375         if (err)
2376                 return err;
2377 
2378         if (regs[BPF_REG_6].type != PTR_TO_CTX) {
2379                 verbose("at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
2380                 return -EINVAL;
2381         }
2382 
2383         if (mode == BPF_IND) {
2384                 /* check explicit source operand */
2385                 err = check_reg_arg(regs, insn->src_reg, SRC_OP);
2386                 if (err)
2387                         return err;
2388         }
2389 
2390         /* reset caller saved regs to unreadable */
2391         for (i = 0; i < CALLER_SAVED_REGS; i++) {
2392                 reg = regs + caller_saved[i];
2393                 reg->type = NOT_INIT;
2394                 reg->imm = 0;
2395         }
2396 
2397         /* mark destination R0 register as readable, since it contains
2398          * the value fetched from the packet
2399          */
2400         regs[BPF_REG_0].type = UNKNOWN_VALUE;
2401         return 0;
2402 }
2403 
2404 /* non-recursive DFS pseudo code
2405  * 1  procedure DFS-iterative(G,v):
2406  * 2      label v as discovered
2407  * 3      let S be a stack
2408  * 4      S.push(v)
2409  * 5      while S is not empty
2410  * 6            t <- S.pop()
2411  * 7            if t is what we're looking for:
2412  * 8                return t
2413  * 9            for all edges e in G.adjacentEdges(t) do
2414  * 10               if edge e is already labelled
2415  * 11                   continue with the next edge
2416  * 12               w <- G.adjacentVertex(t,e)
2417  * 13               if vertex w is not discovered and not explored
2418  * 14                   label e as tree-edge
2419  * 15                   label w as discovered
2420  * 16                   S.push(w)
2421  * 17                   continue at 5
2422  * 18               else if vertex w is discovered
2423  * 19                   label e as back-edge
2424  * 20               else
2425  * 21                   // vertex w is explored
2426  * 22                   label e as forward- or cross-edge
2427  * 23           label t as explored
2428  * 24           S.pop()
2429  *
2430  * convention:
2431  * 0x10 - discovered
2432  * 0x11 - discovered and fall-through edge labelled
2433  * 0x12 - discovered and fall-through and branch edges labelled
2434  * 0x20 - explored
2435  */
2436 
2437 enum {
2438         DISCOVERED = 0x10,
2439         EXPLORED = 0x20,
2440         FALLTHROUGH = 1,
2441         BRANCH = 2,
2442 };
2443 
2444 #define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
2445 
2446 static int *insn_stack; /* stack of insns to process */
2447 static int cur_stack;   /* current stack index */
2448 static int *insn_state;
2449 
2450 /* t, w, e - match pseudo-code above:
2451  * t - index of current instruction
2452  * w - next instruction
2453  * e - edge
2454  */
2455 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
2456 {
2457         if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
2458                 return 0;
2459 
2460         if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
2461                 return 0;
2462 
2463         if (w < 0 || w >= env->prog->len) {
2464                 verbose("jump out of range from insn %d to %d\n", t, w);
2465                 return -EINVAL;
2466         }
2467 
2468         if (e == BRANCH)
2469                 /* mark branch target for state pruning */
2470                 env->explored_states[w] = STATE_LIST_MARK;
2471 
2472         if (insn_state[w] == 0) {
2473                 /* tree-edge */
2474                 insn_state[t] = DISCOVERED | e;
2475                 insn_state[w] = DISCOVERED;
2476                 if (cur_stack >= env->prog->len)
2477                         return -E2BIG;
2478                 insn_stack[cur_stack++] = w;
2479                 return 1;
2480         } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
2481                 verbose("back-edge from insn %d to %d\n", t, w);
2482                 return -EINVAL;
2483         } else if (insn_state[w] == EXPLORED) {
2484                 /* forward- or cross-edge */
2485                 insn_state[t] = DISCOVERED | e;
2486         } else {
2487                 verbose("insn state internal bug\n");
2488                 return -EFAULT;
2489         }
2490         return 0;
2491 }
2492 
2493 /* non-recursive depth-first-search to detect loops in BPF program
2494  * loop == back-edge in directed graph
2495  */
2496 static int check_cfg(struct bpf_verifier_env *env)
2497 {
2498         struct bpf_insn *insns = env->prog->insnsi;
2499         int insn_cnt = env->prog->len;
2500         int ret = 0;
2501         int i, t;
2502 
2503         insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
2504         if (!insn_state)
2505                 return -ENOMEM;
2506 
2507         insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
2508         if (!insn_stack) {
2509                 kfree(insn_state);
2510                 return -ENOMEM;
2511         }
2512 
2513         insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
2514         insn_stack[0] = 0; /* 0 is the first instruction */
2515         cur_stack = 1;
2516 
2517 peek_stack:
2518         if (cur_stack == 0)
2519                 goto check_state;
2520         t = insn_stack[cur_stack - 1];
2521 
2522         if (BPF_CLASS(insns[t].code) == BPF_JMP) {
2523                 u8 opcode = BPF_OP(insns[t].code);
2524 
2525                 if (opcode == BPF_EXIT) {
2526                         goto mark_explored;
2527                 } else if (opcode == BPF_CALL) {
2528                         ret = push_insn(t, t + 1, FALLTHROUGH, env);
2529                         if (ret == 1)
2530                                 goto peek_stack;
2531                         else if (ret < 0)
2532                                 goto err_free;
2533                         if (t + 1 < insn_cnt)
2534                                 env->explored_states[t + 1] = STATE_LIST_MARK;
2535                 } else if (opcode == BPF_JA) {
2536                         if (BPF_SRC(insns[t].code) != BPF_K) {
2537                                 ret = -EINVAL;
2538                                 goto err_free;
2539                         }
2540                         /* unconditional jump with single edge */
2541                         ret = push_insn(t, t + insns[t].off + 1,
2542                                         FALLTHROUGH, env);
2543                         if (ret == 1)
2544                                 goto peek_stack;
2545                         else if (ret < 0)
2546                                 goto err_free;
2547                         /* tell verifier to check for equivalent states
2548                          * after every call and jump
2549                          */
2550                         if (t + 1 < insn_cnt)
2551                                 env->explored_states[t + 1] = STATE_LIST_MARK;
2552                 } else {
2553                         /* conditional jump with two edges */
2554                         env->explored_states[t] = STATE_LIST_MARK;
2555                         ret = push_insn(t, t + 1, FALLTHROUGH, env);
2556                         if (ret == 1)
2557                                 goto peek_stack;
2558                         else if (ret < 0)
2559                                 goto err_free;
2560 
2561                         ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
2562                         if (ret == 1)
2563                                 goto peek_stack;
2564                         else if (ret < 0)
2565                                 goto err_free;
2566                 }
2567         } else {
2568                 /* all other non-branch instructions with single
2569                  * fall-through edge
2570                  */
2571                 ret = push_insn(t, t + 1, FALLTHROUGH, env);
2572                 if (ret == 1)
2573                         goto peek_stack;
2574                 else if (ret < 0)
2575                         goto err_free;
2576         }
2577 
2578 mark_explored:
2579         insn_state[t] = EXPLORED;
2580         if (cur_stack-- <= 0) {
2581                 verbose("pop stack internal bug\n");
2582                 ret = -EFAULT;
2583                 goto err_free;
2584         }
2585         goto peek_stack;
2586 
2587 check_state:
2588         for (i = 0; i < insn_cnt; i++) {
2589                 if (insn_state[i] != EXPLORED) {
2590                         verbose("unreachable insn %d\n", i);
2591                         ret = -EINVAL;
2592                         goto err_free;
2593                 }
2594         }
2595         ret = 0; /* cfg looks good */
2596 
2597 err_free:
2598         kfree(insn_state);
2599         kfree(insn_stack);
2600         return ret;
2601 }
2602 
2603 /* the following conditions reduce the number of explored insns
2604  * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet
2605  */
2606 static bool compare_ptrs_to_packet(struct bpf_reg_state *old,
2607                                    struct bpf_reg_state *cur)
2608 {
2609         if (old->id != cur->id)
2610                 return false;
2611 
2612         /* old ptr_to_packet is more conservative, since it allows smaller
2613          * range. Ex:
2614          * old(off=0,r=10) is equal to cur(off=0,r=20), because
2615          * old(off=0,r=10) means that with range=10 the verifier proceeded
2616          * further and found no issues with the program. Now we're in the same
2617          * spot with cur(off=0,r=20), so we're safe too, since anything further
2618          * will only be looking at most 10 bytes after this pointer.
2619          */
2620         if (old->off == cur->off && old->range < cur->range)
2621                 return true;
2622 
2623         /* old(off=20,r=10) is equal to cur(off=22,re=22 or 5 or 0)
2624          * since both cannot be used for packet access and safe(old)
2625          * pointer has smaller off that could be used for further
2626          * 'if (ptr > data_end)' check
2627          * Ex:
2628          * old(off=20,r=10) and cur(off=22,r=22) and cur(off=22,r=0) mean
2629          * that we cannot access the packet.
2630          * The safe range is:
2631          * [ptr, ptr + range - off)
2632          * so whenever off >=range, it means no safe bytes from this pointer.
2633          * When comparing old->off <= cur->off, it means that older code
2634          * went with smaller offset and that offset was later
2635          * used to figure out the safe range after 'if (ptr > data_end)' check
2636          * Say, 'old' state was explored like:
2637          * ... R3(off=0, r=0)
2638          * R4 = R3 + 20
2639          * ... now R4(off=20,r=0)  <-- here
2640          * if (R4 > data_end)
2641          * ... R4(off=20,r=20), R3(off=0,r=20) and R3 can be used to access.
2642          * ... the code further went all the way to bpf_exit.
2643          * Now the 'cur' state at the mark 'here' has R4(off=30,r=0).
2644          * old_R4(off=20,r=0) equal to cur_R4(off=30,r=0), since if the verifier
2645          * goes further, such cur_R4 will give larger safe packet range after
2646          * 'if (R4 > data_end)' and all further insn were already good with r=20,
2647          * so they will be good with r=30 and we can prune the search.
2648          */
2649         if (old->off <= cur->off &&
2650             old->off >= old->range && cur->off >= cur->range)
2651                 return true;
2652 
2653         return false;
2654 }
2655 
2656 /* compare two verifier states
2657  *
2658  * all states stored in state_list are known to be valid, since
2659  * verifier reached 'bpf_exit' instruction through them
2660  *
2661  * this function is called when verifier exploring different branches of
2662  * execution popped from the state stack. If it sees an old state that has
2663  * more strict register state and more strict stack state then this execution
2664  * branch doesn't need to be explored further, since verifier already
2665  * concluded that more strict state leads to valid finish.
2666  *
2667  * Therefore two states are equivalent if register state is more conservative
2668  * and explored stack state is more conservative than the current one.
2669  * Example:
2670  *       explored                   current
2671  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
2672  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
2673  *
2674  * In other words if current stack state (one being explored) has more
2675  * valid slots than old one that already passed validation, it means
2676  * the verifier can stop exploring and conclude that current state is valid too
2677  *
2678  * Similarly with registers. If explored state has register type as invalid
2679  * whereas register type in current state is meaningful, it means that
2680  * the current state will reach 'bpf_exit' instruction safely
2681  */
2682 static bool states_equal(struct bpf_verifier_env *env,
2683                          struct bpf_verifier_state *old,
2684                          struct bpf_verifier_state *cur)
2685 {
2686         bool varlen_map_access = env->varlen_map_value_access;
2687         struct bpf_reg_state *rold, *rcur;
2688         int i;
2689 
2690         for (i = 0; i < MAX_BPF_REG; i++) {
2691                 rold = &old->regs[i];
2692                 rcur = &cur->regs[i];
2693 
2694                 if (memcmp(rold, rcur, sizeof(*rold)) == 0)
2695                         continue;
2696 
2697                 /* If the ranges were not the same, but everything else was and
2698                  * we didn't do a variable access into a map then we are a-ok.
2699                  */
2700                 if (!varlen_map_access &&
2701                     memcmp(rold, rcur, offsetofend(struct bpf_reg_state, id)) == 0)
2702                         continue;
2703 
2704                 /* If we didn't map access then again we don't care about the
2705                  * mismatched range values and it's ok if our old type was
2706                  * UNKNOWN and we didn't go to a NOT_INIT'ed reg.
2707                  */
2708                 if (rold->type == NOT_INIT ||
2709                     (!varlen_map_access && rold->type == UNKNOWN_VALUE &&
2710                      rcur->type != NOT_INIT))
2711                         continue;
2712 
2713                 /* Don't care about the reg->id in this case. */
2714                 if (rold->type == PTR_TO_MAP_VALUE_OR_NULL &&
2715                     rcur->type == PTR_TO_MAP_VALUE_OR_NULL &&
2716                     rold->map_ptr == rcur->map_ptr)
2717                         continue;
2718 
2719                 if (rold->type == PTR_TO_PACKET && rcur->type == PTR_TO_PACKET &&
2720                     compare_ptrs_to_packet(rold, rcur))
2721                         continue;
2722 
2723                 return false;
2724         }
2725 
2726         for (i = 0; i < MAX_BPF_STACK; i++) {
2727                 if (old->stack_slot_type[i] == STACK_INVALID)
2728                         continue;
2729                 if (old->stack_slot_type[i] != cur->stack_slot_type[i])
2730                         /* Ex: old explored (safe) state has STACK_SPILL in
2731                          * this stack slot, but current has has STACK_MISC ->
2732                          * this verifier states are not equivalent,
2733                          * return false to continue verification of this path
2734                          */
2735                         return false;
2736                 if (i % BPF_REG_SIZE)
2737                         continue;
2738                 if (memcmp(&old->spilled_regs[i / BPF_REG_SIZE],
2739                            &cur->spilled_regs[i / BPF_REG_SIZE],
2740                            sizeof(old->spilled_regs[0])))
2741                         /* when explored and current stack slot types are
2742                          * the same, check that stored pointers types
2743                          * are the same as well.
2744                          * Ex: explored safe path could have stored
2745                          * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8}
2746                          * but current path has stored:
2747                          * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16}
2748                          * such verifier states are not equivalent.
2749                          * return false to continue verification of this path
2750                          */
2751                         return false;
2752                 else
2753                         continue;
2754         }
2755         return true;
2756 }
2757 
2758 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
2759 {
2760         struct bpf_verifier_state_list *new_sl;
2761         struct bpf_verifier_state_list *sl;
2762 
2763         sl = env->explored_states[insn_idx];
2764         if (!sl)
2765                 /* this 'insn_idx' instruction wasn't marked, so we will not
2766                  * be doing state search here
2767                  */
2768                 return 0;
2769 
2770         while (sl != STATE_LIST_MARK) {
2771                 if (states_equal(env, &sl->state, &env->cur_state))
2772                         /* reached equivalent register/stack state,
2773                          * prune the search
2774                          */
2775                         return 1;
2776                 sl = sl->next;
2777         }
2778 
2779         /* there were no equivalent states, remember current one.
2780          * technically the current state is not proven to be safe yet,
2781          * but it will either reach bpf_exit (which means it's safe) or
2782          * it will be rejected. Since there are no loops, we won't be
2783          * seeing this 'insn_idx' instruction again on the way to bpf_exit
2784          */
2785         new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER);
2786         if (!new_sl)
2787                 return -ENOMEM;
2788 
2789         /* add new state to the head of linked list */
2790         memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state));
2791         new_sl->next = env->explored_states[insn_idx];
2792         env->explored_states[insn_idx] = new_sl;
2793         return 0;
2794 }
2795 
2796 static int ext_analyzer_insn_hook(struct bpf_verifier_env *env,
2797                                   int insn_idx, int prev_insn_idx)
2798 {
2799         if (!env->analyzer_ops || !env->analyzer_ops->insn_hook)
2800                 return 0;
2801 
2802         return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx);
2803 }
2804 
2805 static int do_check(struct bpf_verifier_env *env)
2806 {
2807         struct bpf_verifier_state *state = &env->cur_state;
2808         struct bpf_insn *insns = env->prog->insnsi;
2809         struct bpf_reg_state *regs = state->regs;
2810         int insn_cnt = env->prog->len;
2811         int insn_idx, prev_insn_idx = 0;
2812         int insn_processed = 0;
2813         bool do_print_state = false;
2814 
2815         init_reg_state(regs);
2816         insn_idx = 0;
2817         env->varlen_map_value_access = false;
2818         for (;;) {
2819                 struct bpf_insn *insn;
2820                 u8 class;
2821                 int err;
2822 
2823                 if (insn_idx >= insn_cnt) {
2824                         verbose("invalid insn idx %d insn_cnt %d\n",
2825                                 insn_idx, insn_cnt);
2826                         return -EFAULT;
2827                 }
2828 
2829                 insn = &insns[insn_idx];
2830                 class = BPF_CLASS(insn->code);
2831 
2832                 if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
2833                         verbose("BPF program is too large. Processed %d insn\n",
2834                                 insn_processed);
2835                         return -E2BIG;
2836                 }
2837 
2838                 err = is_state_visited(env, insn_idx);
2839                 if (err < 0)
2840                         return err;
2841                 if (err == 1) {
2842                         /* found equivalent state, can prune the search */
2843                         if (log_level) {
2844                                 if (do_print_state)
2845                                         verbose("\nfrom %d to %d: safe\n",
2846                                                 prev_insn_idx, insn_idx);
2847                                 else
2848                                         verbose("%d: safe\n", insn_idx);
2849                         }
2850                         goto process_bpf_exit;
2851                 }
2852 
2853                 if (need_resched())
2854                         cond_resched();
2855 
2856                 if (log_level && do_print_state) {
2857                         verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
2858                         print_verifier_state(&env->cur_state);
2859                         do_print_state = false;
2860                 }
2861 
2862                 if (log_level) {
2863                         verbose("%d: ", insn_idx);
2864                         print_bpf_insn(env, insn);
2865                 }
2866 
2867                 err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx);
2868                 if (err)
2869                         return err;
2870 
2871                 if (class == BPF_ALU || class == BPF_ALU64) {
2872                         err = check_alu_op(env, insn);
2873                         if (err)
2874                                 return err;
2875 
2876                 } else if (class == BPF_LDX) {
2877                         enum bpf_reg_type *prev_src_type, src_reg_type;
2878 
2879                         /* check for reserved fields is already done */
2880 
2881                         /* check src operand */
2882                         err = check_reg_arg(regs, insn->src_reg, SRC_OP);
2883                         if (err)
2884                                 return err;
2885 
2886                         err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
2887                         if (err)
2888                                 return err;
2889 
2890                         src_reg_type = regs[insn->src_reg].type;
2891 
2892                         /* check that memory (src_reg + off) is readable,
2893                          * the state of dst_reg will be updated by this func
2894                          */
2895                         err = check_mem_access(env, insn->src_reg, insn->off,
2896                                                BPF_SIZE(insn->code), BPF_READ,
2897                                                insn->dst_reg);
2898                         if (err)
2899                                 return err;
2900 
2901                         if (BPF_SIZE(insn->code) != BPF_W &&
2902                             BPF_SIZE(insn->code) != BPF_DW) {
2903                                 insn_idx++;
2904                                 continue;
2905                         }
2906 
2907                         prev_src_type = &env->insn_aux_data[insn_idx].ptr_type;
2908 
2909                         if (*prev_src_type == NOT_INIT) {
2910                                 /* saw a valid insn
2911                                  * dst_reg = *(u32 *)(src_reg + off)
2912                                  * save type to validate intersecting paths
2913                                  */
2914                                 *prev_src_type = src_reg_type;
2915 
2916                         } else if (src_reg_type != *prev_src_type &&
2917                                    (src_reg_type == PTR_TO_CTX ||
2918                                     *prev_src_type == PTR_TO_CTX)) {
2919                                 /* ABuser program is trying to use the same insn
2920                                  * dst_reg = *(u32*) (src_reg + off)
2921                                  * with different pointer types:
2922                                  * src_reg == ctx in one branch and
2923                                  * src_reg == stack|map in some other branch.
2924                                  * Reject it.
2925                                  */
2926                                 verbose("same insn cannot be used with different pointers\n");
2927                                 return -EINVAL;
2928                         }
2929 
2930                 } else if (class == BPF_STX) {
2931                         enum bpf_reg_type *prev_dst_type, dst_reg_type;
2932 
2933                         if (BPF_MODE(insn->code) == BPF_XADD) {
2934                                 err = check_xadd(env, insn);
2935                                 if (err)
2936                                         return err;
2937                                 insn_idx++;
2938                                 continue;
2939                         }
2940 
2941                         /* check src1 operand */
2942                         err = check_reg_arg(regs, insn->src_reg, SRC_OP);
2943                         if (err)
2944                                 return err;
2945                         /* check src2 operand */
2946                         err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
2947                         if (err)
2948                                 return err;
2949 
2950                         dst_reg_type = regs[insn->dst_reg].type;
2951 
2952                         /* check that memory (dst_reg + off) is writeable */
2953                         err = check_mem_access(env, insn->dst_reg, insn->off,
2954                                                BPF_SIZE(insn->code), BPF_WRITE,
2955                                                insn->src_reg);
2956                         if (err)
2957                                 return err;
2958 
2959                         prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type;
2960 
2961                         if (*prev_dst_type == NOT_INIT) {
2962                                 *prev_dst_type = dst_reg_type;
2963                         } else if (dst_reg_type != *prev_dst_type &&
2964                                    (dst_reg_type == PTR_TO_CTX ||
2965                                     *prev_dst_type == PTR_TO_CTX)) {
2966                                 verbose("same insn cannot be used with different pointers\n");
2967                                 return -EINVAL;
2968                         }
2969 
2970                 } else if (class == BPF_ST) {
2971                         if (BPF_MODE(insn->code) != BPF_MEM ||
2972                             insn->src_reg != BPF_REG_0) {
2973                                 verbose("BPF_ST uses reserved fields\n");
2974                                 return -EINVAL;
2975                         }
2976                         /* check src operand */
2977                         err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
2978                         if (err)
2979                                 return err;
2980 
2981                         /* check that memory (dst_reg + off) is writeable */
2982                         err = check_mem_access(env, insn->dst_reg, insn->off,
2983                                                BPF_SIZE(insn->code), BPF_WRITE,
2984                                                -1);
2985                         if (err)
2986                                 return err;
2987 
2988                 } else if (class == BPF_JMP) {
2989                         u8 opcode = BPF_OP(insn->code);
2990 
2991                         if (opcode == BPF_CALL) {
2992                                 if (BPF_SRC(insn->code) != BPF_K ||
2993                                     insn->off != 0 ||
2994                                     insn->src_reg != BPF_REG_0 ||
2995                                     insn->dst_reg != BPF_REG_0) {
2996                                         verbose("BPF_CALL uses reserved fields\n");
2997                                         return -EINVAL;
2998                                 }
2999 
3000                                 err = check_call(env, insn->imm);
3001                                 if (err)
3002                                         return err;
3003 
3004                         } else if (opcode == BPF_JA) {
3005                                 if (BPF_SRC(insn->code) != BPF_K ||
3006                                     insn->imm != 0 ||
3007                                     insn->src_reg != BPF_REG_0 ||
3008                                     insn->dst_reg != BPF_REG_0) {
3009                                         verbose("BPF_JA uses reserved fields\n");
3010                                         return -EINVAL;
3011                                 }
3012 
3013                                 insn_idx += insn->off + 1;
3014                                 continue;
3015 
3016                         } else if (opcode == BPF_EXIT) {
3017                                 if (BPF_SRC(insn->code) != BPF_K ||
3018                                     insn->imm != 0 ||
3019                                     insn->src_reg != BPF_REG_0 ||
3020                                     insn->dst_reg != BPF_REG_0) {
3021                                         verbose("BPF_EXIT uses reserved fields\n");
3022                                         return -EINVAL;
3023                                 }
3024 
3025                                 /* eBPF calling convetion is such that R0 is used
3026                                  * to return the value from eBPF program.
3027                                  * Make sure that it's readable at this time
3028                                  * of bpf_exit, which means that program wrote
3029                                  * something into it earlier
3030                                  */
3031                                 err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
3032                                 if (err)
3033                                         return err;
3034 
3035                                 if (is_pointer_value(env, BPF_REG_0)) {
3036                                         verbose("R0 leaks addr as return value\n");
3037                                         return -EACCES;
3038                                 }
3039 
3040 process_bpf_exit:
3041                                 insn_idx = pop_stack(env, &prev_insn_idx);
3042                                 if (insn_idx < 0) {
3043                                         break;
3044                                 } else {
3045                                         do_print_state = true;
3046                                         continue;
3047                                 }
3048                         } else {
3049                                 err = check_cond_jmp_op(env, insn, &insn_idx);
3050                                 if (err)
3051                                         return err;
3052                         }
3053                 } else if (class == BPF_LD) {
3054                         u8 mode = BPF_MODE(insn->code);
3055 
3056                         if (mode == BPF_ABS || mode == BPF_IND) {
3057                                 err = check_ld_abs(env, insn);
3058                                 if (err)
3059                                         return err;
3060 
3061                         } else if (mode == BPF_IMM) {
3062                                 err = check_ld_imm(env, insn);
3063                                 if (err)
3064                                         return err;
3065 
3066                                 insn_idx++;
3067                         } else {
3068                                 verbose("invalid BPF_LD mode\n");
3069                                 return -EINVAL;
3070                         }
3071                         reset_reg_range_values(regs, insn->dst_reg);
3072                 } else {
3073                         verbose("unknown insn class %d\n", class);
3074                         return -EINVAL;
3075                 }
3076 
3077                 insn_idx++;
3078         }
3079 
3080         verbose("processed %d insns\n", insn_processed);
3081         return 0;
3082 }
3083 
3084 static int check_map_prog_compatibility(struct bpf_map *map,
3085                                         struct bpf_prog *prog)
3086 
3087 {
3088         if (prog->type == BPF_PROG_TYPE_PERF_EVENT &&
3089             (map->map_type == BPF_MAP_TYPE_HASH ||
3090              map->map_type == BPF_MAP_TYPE_PERCPU_HASH) &&
3091             (map->map_flags & BPF_F_NO_PREALLOC)) {
3092                 verbose("perf_event programs can only use preallocated hash map\n");
3093                 return -EINVAL;
3094         }
3095         return 0;
3096 }
3097 
3098 /* look for pseudo eBPF instructions that access map FDs and
3099  * replace them with actual map pointers
3100  */
3101 static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env)
3102 {
3103         struct bpf_insn *insn = env->prog->insnsi;
3104         int insn_cnt = env->prog->len;
3105         int i, j, err;
3106 
3107         err = bpf_prog_calc_tag(env->prog);
3108         if (err)
3109                 return err;
3110 
3111         for (i = 0; i < insn_cnt; i++, insn++) {
3112                 if (BPF_CLASS(insn->code) == BPF_LDX &&
3113                     (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
3114                         verbose("BPF_LDX uses reserved fields\n");
3115                         return -EINVAL;
3116                 }
3117 
3118                 if (BPF_CLASS(insn->code) == BPF_STX &&
3119                     ((BPF_MODE(insn->code) != BPF_MEM &&
3120                       BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
3121                         verbose("BPF_STX uses reserved fields\n");
3122                         return -EINVAL;
3123                 }
3124 
3125                 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
3126                         struct bpf_map *map;
3127                         struct fd f;
3128 
3129                         if (i == insn_cnt - 1 || insn[1].code != 0 ||
3130                             insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
3131                             insn[1].off != 0) {
3132                                 verbose("invalid bpf_ld_imm64 insn\n");
3133                                 return -EINVAL;
3134                         }
3135 
3136                         if (insn->src_reg == 0)
3137                                 /* valid generic load 64-bit imm */
3138                                 goto next_insn;
3139 
3140                         if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
3141                                 verbose("unrecognized bpf_ld_imm64 insn\n");
3142                                 return -EINVAL;
3143                         }
3144 
3145                         f = fdget(insn->imm);
3146                         map = __bpf_map_get(f);
3147                         if (IS_ERR(map)) {
3148                                 verbose("fd %d is not pointing to valid bpf_map\n",
3149                                         insn->imm);
3150                                 return PTR_ERR(map);
3151                         }
3152 
3153                         err = check_map_prog_compatibility(map, env->prog);
3154                         if (err) {
3155                                 fdput(f);
3156                                 return err;
3157                         }
3158 
3159                         /* store map pointer inside BPF_LD_IMM64 instruction */
3160                         insn[0].imm = (u32) (unsigned long) map;
3161                         insn[1].imm = ((u64) (unsigned long) map) >> 32;
3162 
3163                         /* check whether we recorded this map already */
3164                         for (j = 0; j < env->used_map_cnt; j++)
3165                                 if (env->used_maps[j] == map) {
3166                                         fdput(f);
3167                                         goto next_insn;
3168                                 }
3169 
3170                         if (env->used_map_cnt >= MAX_USED_MAPS) {
3171                                 fdput(f);
3172                                 return -E2BIG;
3173                         }
3174 
3175                         /* hold the map. If the program is rejected by verifier,
3176                          * the map will be released by release_maps() or it
3177                          * will be used by the valid program until it's unloaded
3178                          * and all maps are released in free_bpf_prog_info()
3179                          */
3180                         map = bpf_map_inc(map, false);
3181                         if (IS_ERR(map)) {
3182                                 fdput(f);
3183                                 return PTR_ERR(map);
3184                         }
3185                         env->used_maps[env->used_map_cnt++] = map;
3186 
3187                         fdput(f);
3188 next_insn:
3189                         insn++;
3190                         i++;
3191                 }
3192         }
3193 
3194         /* now all pseudo BPF_LD_IMM64 instructions load valid
3195          * 'struct bpf_map *' into a register instead of user map_fd.
3196          * These pointers will be used later by verifier to validate map access.
3197          */
3198         return 0;
3199 }
3200 
3201 /* drop refcnt of maps used by the rejected program */
3202 static void release_maps(struct bpf_verifier_env *env)
3203 {
3204         int i;
3205 
3206         for (i = 0; i < env->used_map_cnt; i++)
3207                 bpf_map_put(env->used_maps[i]);
3208 }
3209 
3210 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
3211 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
3212 {
3213         struct bpf_insn *insn = env->prog->insnsi;
3214         int insn_cnt = env->prog->len;
3215         int i;
3216 
3217         for (i = 0; i < insn_cnt; i++, insn++)
3218                 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
3219                         insn->src_reg = 0;
3220 }
3221 
3222 /* convert load instructions that access fields of 'struct __sk_buff'
3223  * into sequence of instructions that access fields of 'struct sk_buff'
3224  */
3225 static int convert_ctx_accesses(struct bpf_verifier_env *env)
3226 {
3227         const struct bpf_verifier_ops *ops = env->prog->aux->ops;
3228         const int insn_cnt = env->prog->len;
3229         struct bpf_insn insn_buf[16], *insn;
3230         struct bpf_prog *new_prog;
3231         enum bpf_access_type type;
3232         int i, cnt, delta = 0;
3233 
3234         if (ops->gen_prologue) {
3235                 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
3236                                         env->prog);
3237                 if (cnt >= ARRAY_SIZE(insn_buf)) {
3238                         verbose("bpf verifier is misconfigured\n");
3239                         return -EINVAL;
3240                 } else if (cnt) {
3241                         new_prog = bpf_patch_insn_single(env->prog, 0,
3242                                                          insn_buf, cnt);
3243                         if (!new_prog)
3244                                 return -ENOMEM;
3245                         env->prog = new_prog;
3246                         delta += cnt - 1;
3247                 }
3248         }
3249 
3250         if (!ops->convert_ctx_access)
3251                 return 0;
3252 
3253         insn = env->prog->insnsi + delta;
3254 
3255         for (i = 0; i < insn_cnt; i++, insn++) {
3256                 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) ||
3257                     insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
3258                     insn->code == (BPF_LDX | BPF_MEM | BPF_W) ||
3259                     insn->code == (BPF_LDX | BPF_MEM | BPF_DW))
3260                         type = BPF_READ;
3261                 else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) ||
3262                          insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
3263                          insn->code == (BPF_STX | BPF_MEM | BPF_W) ||
3264                          insn->code == (BPF_STX | BPF_MEM | BPF_DW))
3265                         type = BPF_WRITE;
3266                 else
3267                         continue;
3268 
3269                 if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX)
3270                         continue;
3271 
3272                 cnt = ops->convert_ctx_access(type, insn, insn_buf, env->prog);
3273                 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
3274                         verbose("bpf verifier is misconfigured\n");
3275                         return -EINVAL;
3276                 }
3277 
3278                 new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf,
3279                                                  cnt);
3280                 if (!new_prog)
3281                         return -ENOMEM;
3282 
3283                 delta += cnt - 1;
3284 
3285                 /* keep walking new program and skip insns we just inserted */
3286                 env->prog = new_prog;
3287                 insn      = new_prog->insnsi + i + delta;
3288         }
3289 
3290         return 0;
3291 }
3292 
3293 static void free_states(struct bpf_verifier_env *env)
3294 {
3295         struct bpf_verifier_state_list *sl, *sln;
3296         int i;
3297 
3298         if (!env->explored_states)
3299                 return;
3300 
3301         for (i = 0; i < env->prog->len; i++) {
3302                 sl = env->explored_states[i];
3303 
3304                 if (sl)
3305                         while (sl != STATE_LIST_MARK) {
3306                                 sln = sl->next;
3307                                 kfree(sl);
3308                                 sl = sln;
3309                         }
3310         }
3311 
3312         kfree(env->explored_states);
3313 }
3314 
3315 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
3316 {
3317         char __user *log_ubuf = NULL;
3318         struct bpf_verifier_env *env;
3319         int ret = -EINVAL;
3320 
3321         /* 'struct bpf_verifier_env' can be global, but since it's not small,
3322          * allocate/free it every time bpf_check() is called
3323          */
3324         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
3325         if (!env)
3326                 return -ENOMEM;
3327 
3328         env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
3329                                      (*prog)->len);
3330         ret = -ENOMEM;
3331         if (!env->insn_aux_data)
3332                 goto err_free_env;
3333         env->prog = *prog;
3334 
3335         /* grab the mutex to protect few globals used by verifier */
3336         mutex_lock(&bpf_verifier_lock);
3337 
3338         if (attr->log_level || attr->log_buf || attr->log_size) {
3339                 /* user requested verbose verifier output
3340                  * and supplied buffer to store the verification trace
3341                  */
3342                 log_level = attr->log_level;
3343                 log_ubuf = (char __user *) (unsigned long) attr->log_buf;
3344                 log_size = attr->log_size;
3345                 log_len = 0;
3346 
3347                 ret = -EINVAL;
3348                 /* log_* values have to be sane */
3349                 if (log_size < 128 || log_size > UINT_MAX >> 8 ||
3350                     log_level == 0 || log_ubuf == NULL)
3351                         goto err_unlock;
3352 
3353                 ret = -ENOMEM;
3354                 log_buf = vmalloc(log_size);
3355                 if (!log_buf)
3356                         goto err_unlock;
3357         } else {
3358                 log_level = 0;
3359         }
3360 
3361         ret = replace_map_fd_with_map_ptr(env);
3362         if (ret < 0)
3363                 goto skip_full_check;
3364 
3365         env->explored_states = kcalloc(env->prog->len,
3366                                        sizeof(struct bpf_verifier_state_list *),
3367                                        GFP_USER);
3368         ret = -ENOMEM;
3369         if (!env->explored_states)
3370                 goto skip_full_check;
3371 
3372         ret = check_cfg(env);
3373         if (ret < 0)
3374                 goto skip_full_check;
3375 
3376         env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
3377 
3378         ret = do_check(env);
3379 
3380 skip_full_check:
3381         while (pop_stack(env, NULL) >= 0);
3382         free_states(env);
3383 
3384         if (ret == 0)
3385                 /* program is valid, convert *(u32*)(ctx + off) accesses */
3386                 ret = convert_ctx_accesses(env);
3387 
3388         if (log_level && log_len >= log_size - 1) {
3389                 BUG_ON(log_len >= log_size);
3390                 /* verifier log exceeded user supplied buffer */
3391                 ret = -ENOSPC;
3392                 /* fall through to return what was recorded */
3393         }
3394 
3395         /* copy verifier log back to user space including trailing zero */
3396         if (log_level && copy_to_user(log_ubuf, log_buf, log_len + 1) != 0) {
3397                 ret = -EFAULT;
3398                 goto free_log_buf;
3399         }
3400 
3401         if (ret == 0 && env->used_map_cnt) {
3402                 /* if program passed verifier, update used_maps in bpf_prog_info */
3403                 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
3404                                                           sizeof(env->used_maps[0]),
3405                                                           GFP_KERNEL);
3406 
3407                 if (!env->prog->aux->used_maps) {
3408                         ret = -ENOMEM;
3409                         goto free_log_buf;
3410                 }
3411 
3412                 memcpy(env->prog->aux->used_maps, env->used_maps,
3413                        sizeof(env->used_maps[0]) * env->used_map_cnt);
3414                 env->prog->aux->used_map_cnt = env->used_map_cnt;
3415 
3416                 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
3417                  * bpf_ld_imm64 instructions
3418                  */
3419                 convert_pseudo_ld_imm64(env);
3420         }
3421 
3422 free_log_buf:
3423         if (log_level)
3424                 vfree(log_buf);
3425         if (!env->prog->aux->used_maps)
3426                 /* if we didn't copy map pointers into bpf_prog_info, release
3427                  * them now. Otherwise free_bpf_prog_info() will release them.
3428                  */
3429                 release_maps(env);
3430         *prog = env->prog;
3431 err_unlock:
3432         mutex_unlock(&bpf_verifier_lock);
3433         vfree(env->insn_aux_data);
3434 err_free_env:
3435         kfree(env);
3436         return ret;
3437 }
3438 
3439 int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops,
3440                  void *priv)
3441 {
3442         struct bpf_verifier_env *env;
3443         int ret;
3444 
3445         env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
3446         if (!env)
3447                 return -ENOMEM;
3448 
3449         env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) *
3450                                      prog->len);
3451         ret = -ENOMEM;
3452         if (!env->insn_aux_data)
3453                 goto err_free_env;
3454         env->prog = prog;
3455         env->analyzer_ops = ops;
3456         env->analyzer_priv = priv;
3457 
3458         /* grab the mutex to protect few globals used by verifier */
3459         mutex_lock(&bpf_verifier_lock);
3460 
3461         log_level = 0;
3462 
3463         env->explored_states = kcalloc(env->prog->len,
3464                                        sizeof(struct bpf_verifier_state_list *),
3465                                        GFP_KERNEL);
3466         ret = -ENOMEM;
3467         if (!env->explored_states)
3468                 goto skip_full_check;
3469 
3470         ret = check_cfg(env);
3471         if (ret < 0)
3472                 goto skip_full_check;
3473 
3474         env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
3475 
3476         ret = do_check(env);
3477 
3478 skip_full_check:
3479         while (pop_stack(env, NULL) >= 0);
3480         free_states(env);
3481 
3482         mutex_unlock(&bpf_verifier_lock);
3483         vfree(env->insn_aux_data);
3484 err_free_env:
3485         kfree(env);
3486         return ret;
3487 }
3488 EXPORT_SYMBOL_GPL(bpf_analyzer);
3489 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp