~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/core/filter.c

Version: ~ [ linux-5.15-rc5 ] ~ [ linux-5.14.11 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.72 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.152 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.210 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.250 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.286 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.288 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Linux Socket Filter - Kernel level socket filtering
  3  *
  4  * Based on the design of the Berkeley Packet Filter. The new
  5  * internal format has been designed by PLUMgrid:
  6  *
  7  *      Copyright (c) 2011 - 2014 PLUMgrid, http://plumgrid.com
  8  *
  9  * Authors:
 10  *
 11  *      Jay Schulist <jschlst@samba.org>
 12  *      Alexei Starovoitov <ast@plumgrid.com>
 13  *      Daniel Borkmann <dborkman@redhat.com>
 14  *
 15  * This program is free software; you can redistribute it and/or
 16  * modify it under the terms of the GNU General Public License
 17  * as published by the Free Software Foundation; either version
 18  * 2 of the License, or (at your option) any later version.
 19  *
 20  * Andi Kleen - Fix a few bad bugs and races.
 21  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
 22  */
 23 
 24 #include <linux/module.h>
 25 #include <linux/types.h>
 26 #include <linux/mm.h>
 27 #include <linux/fcntl.h>
 28 #include <linux/socket.h>
 29 #include <linux/sock_diag.h>
 30 #include <linux/in.h>
 31 #include <linux/inet.h>
 32 #include <linux/netdevice.h>
 33 #include <linux/if_packet.h>
 34 #include <linux/if_arp.h>
 35 #include <linux/gfp.h>
 36 #include <net/inet_common.h>
 37 #include <net/ip.h>
 38 #include <net/protocol.h>
 39 #include <net/netlink.h>
 40 #include <linux/skbuff.h>
 41 #include <net/sock.h>
 42 #include <net/flow_dissector.h>
 43 #include <linux/errno.h>
 44 #include <linux/timer.h>
 45 #include <linux/uaccess.h>
 46 #include <asm/unaligned.h>
 47 #include <asm/cmpxchg.h>
 48 #include <linux/filter.h>
 49 #include <linux/ratelimit.h>
 50 #include <linux/seccomp.h>
 51 #include <linux/if_vlan.h>
 52 #include <linux/bpf.h>
 53 #include <net/sch_generic.h>
 54 #include <net/cls_cgroup.h>
 55 #include <net/dst_metadata.h>
 56 #include <net/dst.h>
 57 #include <net/sock_reuseport.h>
 58 #include <net/busy_poll.h>
 59 #include <net/tcp.h>
 60 #include <linux/bpf_trace.h>
 61 
 62 /**
 63  *      sk_filter_trim_cap - run a packet through a socket filter
 64  *      @sk: sock associated with &sk_buff
 65  *      @skb: buffer to filter
 66  *      @cap: limit on how short the eBPF program may trim the packet
 67  *
 68  * Run the eBPF program and then cut skb->data to correct size returned by
 69  * the program. If pkt_len is 0 we toss packet. If skb->len is smaller
 70  * than pkt_len we keep whole skb->data. This is the socket level
 71  * wrapper to BPF_PROG_RUN. It returns 0 if the packet should
 72  * be accepted or -EPERM if the packet should be tossed.
 73  *
 74  */
 75 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
 76 {
 77         int err;
 78         struct sk_filter *filter;
 79 
 80         /*
 81          * If the skb was allocated from pfmemalloc reserves, only
 82          * allow SOCK_MEMALLOC sockets to use it as this socket is
 83          * helping free memory
 84          */
 85         if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) {
 86                 NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP);
 87                 return -ENOMEM;
 88         }
 89         err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb);
 90         if (err)
 91                 return err;
 92 
 93         err = security_sock_rcv_skb(sk, skb);
 94         if (err)
 95                 return err;
 96 
 97         rcu_read_lock();
 98         filter = rcu_dereference(sk->sk_filter);
 99         if (filter) {
100                 struct sock *save_sk = skb->sk;
101                 unsigned int pkt_len;
102 
103                 skb->sk = sk;
104                 pkt_len = bpf_prog_run_save_cb(filter->prog, skb);
105                 skb->sk = save_sk;
106                 err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
107         }
108         rcu_read_unlock();
109 
110         return err;
111 }
112 EXPORT_SYMBOL(sk_filter_trim_cap);
113 
114 BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb)
115 {
116         return skb_get_poff(skb);
117 }
118 
119 BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x)
120 {
121         struct nlattr *nla;
122 
123         if (skb_is_nonlinear(skb))
124                 return 0;
125 
126         if (skb->len < sizeof(struct nlattr))
127                 return 0;
128 
129         if (a > skb->len - sizeof(struct nlattr))
130                 return 0;
131 
132         nla = nla_find((struct nlattr *) &skb->data[a], skb->len - a, x);
133         if (nla)
134                 return (void *) nla - (void *) skb->data;
135 
136         return 0;
137 }
138 
139 BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x)
140 {
141         struct nlattr *nla;
142 
143         if (skb_is_nonlinear(skb))
144                 return 0;
145 
146         if (skb->len < sizeof(struct nlattr))
147                 return 0;
148 
149         if (a > skb->len - sizeof(struct nlattr))
150                 return 0;
151 
152         nla = (struct nlattr *) &skb->data[a];
153         if (nla->nla_len > skb->len - a)
154                 return 0;
155 
156         nla = nla_find_nested(nla, x);
157         if (nla)
158                 return (void *) nla - (void *) skb->data;
159 
160         return 0;
161 }
162 
163 BPF_CALL_0(__get_raw_cpu_id)
164 {
165         return raw_smp_processor_id();
166 }
167 
168 static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
169         .func           = __get_raw_cpu_id,
170         .gpl_only       = false,
171         .ret_type       = RET_INTEGER,
172 };
173 
174 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
175                               struct bpf_insn *insn_buf)
176 {
177         struct bpf_insn *insn = insn_buf;
178 
179         switch (skb_field) {
180         case SKF_AD_MARK:
181                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
182 
183                 *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
184                                       offsetof(struct sk_buff, mark));
185                 break;
186 
187         case SKF_AD_PKTTYPE:
188                 *insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
189                 *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
190 #ifdef __BIG_ENDIAN_BITFIELD
191                 *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
192 #endif
193                 break;
194 
195         case SKF_AD_QUEUE:
196                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
197 
198                 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
199                                       offsetof(struct sk_buff, queue_mapping));
200                 break;
201 
202         case SKF_AD_VLAN_TAG:
203         case SKF_AD_VLAN_TAG_PRESENT:
204                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
205                 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
206 
207                 /* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
208                 *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
209                                       offsetof(struct sk_buff, vlan_tci));
210                 if (skb_field == SKF_AD_VLAN_TAG) {
211                         *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
212                                                 ~VLAN_TAG_PRESENT);
213                 } else {
214                         /* dst_reg >>= 12 */
215                         *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
216                         /* dst_reg &= 1 */
217                         *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
218                 }
219                 break;
220         }
221 
222         return insn - insn_buf;
223 }
224 
225 static bool convert_bpf_extensions(struct sock_filter *fp,
226                                    struct bpf_insn **insnp)
227 {
228         struct bpf_insn *insn = *insnp;
229         u32 cnt;
230 
231         switch (fp->k) {
232         case SKF_AD_OFF + SKF_AD_PROTOCOL:
233                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
234 
235                 /* A = *(u16 *) (CTX + offsetof(protocol)) */
236                 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
237                                       offsetof(struct sk_buff, protocol));
238                 /* A = ntohs(A) [emitting a nop or swap16] */
239                 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
240                 break;
241 
242         case SKF_AD_OFF + SKF_AD_PKTTYPE:
243                 cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
244                 insn += cnt - 1;
245                 break;
246 
247         case SKF_AD_OFF + SKF_AD_IFINDEX:
248         case SKF_AD_OFF + SKF_AD_HATYPE:
249                 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
250                 BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2);
251 
252                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
253                                       BPF_REG_TMP, BPF_REG_CTX,
254                                       offsetof(struct sk_buff, dev));
255                 /* if (tmp != 0) goto pc + 1 */
256                 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_TMP, 0, 1);
257                 *insn++ = BPF_EXIT_INSN();
258                 if (fp->k == SKF_AD_OFF + SKF_AD_IFINDEX)
259                         *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_TMP,
260                                             offsetof(struct net_device, ifindex));
261                 else
262                         *insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_TMP,
263                                             offsetof(struct net_device, type));
264                 break;
265 
266         case SKF_AD_OFF + SKF_AD_MARK:
267                 cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
268                 insn += cnt - 1;
269                 break;
270 
271         case SKF_AD_OFF + SKF_AD_RXHASH:
272                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4);
273 
274                 *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
275                                     offsetof(struct sk_buff, hash));
276                 break;
277 
278         case SKF_AD_OFF + SKF_AD_QUEUE:
279                 cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
280                 insn += cnt - 1;
281                 break;
282 
283         case SKF_AD_OFF + SKF_AD_VLAN_TAG:
284                 cnt = convert_skb_access(SKF_AD_VLAN_TAG,
285                                          BPF_REG_A, BPF_REG_CTX, insn);
286                 insn += cnt - 1;
287                 break;
288 
289         case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
290                 cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
291                                          BPF_REG_A, BPF_REG_CTX, insn);
292                 insn += cnt - 1;
293                 break;
294 
295         case SKF_AD_OFF + SKF_AD_VLAN_TPID:
296                 BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
297 
298                 /* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
299                 *insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
300                                       offsetof(struct sk_buff, vlan_proto));
301                 /* A = ntohs(A) [emitting a nop or swap16] */
302                 *insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
303                 break;
304 
305         case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
306         case SKF_AD_OFF + SKF_AD_NLATTR:
307         case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
308         case SKF_AD_OFF + SKF_AD_CPU:
309         case SKF_AD_OFF + SKF_AD_RANDOM:
310                 /* arg1 = CTX */
311                 *insn++ = BPF_MOV64_REG(BPF_REG_ARG1, BPF_REG_CTX);
312                 /* arg2 = A */
313                 *insn++ = BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_A);
314                 /* arg3 = X */
315                 *insn++ = BPF_MOV64_REG(BPF_REG_ARG3, BPF_REG_X);
316                 /* Emit call(arg1=CTX, arg2=A, arg3=X) */
317                 switch (fp->k) {
318                 case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
319                         *insn = BPF_EMIT_CALL(__skb_get_pay_offset);
320                         break;
321                 case SKF_AD_OFF + SKF_AD_NLATTR:
322                         *insn = BPF_EMIT_CALL(__skb_get_nlattr);
323                         break;
324                 case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
325                         *insn = BPF_EMIT_CALL(__skb_get_nlattr_nest);
326                         break;
327                 case SKF_AD_OFF + SKF_AD_CPU:
328                         *insn = BPF_EMIT_CALL(__get_raw_cpu_id);
329                         break;
330                 case SKF_AD_OFF + SKF_AD_RANDOM:
331                         *insn = BPF_EMIT_CALL(bpf_user_rnd_u32);
332                         bpf_user_rnd_init_once();
333                         break;
334                 }
335                 break;
336 
337         case SKF_AD_OFF + SKF_AD_ALU_XOR_X:
338                 /* A ^= X */
339                 *insn = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_X);
340                 break;
341 
342         default:
343                 /* This is just a dummy call to avoid letting the compiler
344                  * evict __bpf_call_base() as an optimization. Placed here
345                  * where no-one bothers.
346                  */
347                 BUG_ON(__bpf_call_base(0, 0, 0, 0, 0) != 0);
348                 return false;
349         }
350 
351         *insnp = insn;
352         return true;
353 }
354 
355 /**
356  *      bpf_convert_filter - convert filter program
357  *      @prog: the user passed filter program
358  *      @len: the length of the user passed filter program
359  *      @new_prog: allocated 'struct bpf_prog' or NULL
360  *      @new_len: pointer to store length of converted program
361  *
362  * Remap 'sock_filter' style classic BPF (cBPF) instruction set to 'bpf_insn'
363  * style extended BPF (eBPF).
364  * Conversion workflow:
365  *
366  * 1) First pass for calculating the new program length:
367  *   bpf_convert_filter(old_prog, old_len, NULL, &new_len)
368  *
369  * 2) 2nd pass to remap in two passes: 1st pass finds new
370  *    jump offsets, 2nd pass remapping:
371  *   bpf_convert_filter(old_prog, old_len, new_prog, &new_len);
372  */
373 static int bpf_convert_filter(struct sock_filter *prog, int len,
374                               struct bpf_prog *new_prog, int *new_len)
375 {
376         int new_flen = 0, pass = 0, target, i, stack_off;
377         struct bpf_insn *new_insn, *first_insn = NULL;
378         struct sock_filter *fp;
379         int *addrs = NULL;
380         u8 bpf_src;
381 
382         BUILD_BUG_ON(BPF_MEMWORDS * sizeof(u32) > MAX_BPF_STACK);
383         BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
384 
385         if (len <= 0 || len > BPF_MAXINSNS)
386                 return -EINVAL;
387 
388         if (new_prog) {
389                 first_insn = new_prog->insnsi;
390                 addrs = kcalloc(len, sizeof(*addrs),
391                                 GFP_KERNEL | __GFP_NOWARN);
392                 if (!addrs)
393                         return -ENOMEM;
394         }
395 
396 do_pass:
397         new_insn = first_insn;
398         fp = prog;
399 
400         /* Classic BPF related prologue emission. */
401         if (new_prog) {
402                 /* Classic BPF expects A and X to be reset first. These need
403                  * to be guaranteed to be the first two instructions.
404                  */
405                 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
406                 *new_insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_X, BPF_REG_X);
407 
408                 /* All programs must keep CTX in callee saved BPF_REG_CTX.
409                  * In eBPF case it's done by the compiler, here we need to
410                  * do this ourself. Initial CTX is present in BPF_REG_ARG1.
411                  */
412                 *new_insn++ = BPF_MOV64_REG(BPF_REG_CTX, BPF_REG_ARG1);
413         } else {
414                 new_insn += 3;
415         }
416 
417         for (i = 0; i < len; fp++, i++) {
418                 struct bpf_insn tmp_insns[6] = { };
419                 struct bpf_insn *insn = tmp_insns;
420 
421                 if (addrs)
422                         addrs[i] = new_insn - first_insn;
423 
424                 switch (fp->code) {
425                 /* All arithmetic insns and skb loads map as-is. */
426                 case BPF_ALU | BPF_ADD | BPF_X:
427                 case BPF_ALU | BPF_ADD | BPF_K:
428                 case BPF_ALU | BPF_SUB | BPF_X:
429                 case BPF_ALU | BPF_SUB | BPF_K:
430                 case BPF_ALU | BPF_AND | BPF_X:
431                 case BPF_ALU | BPF_AND | BPF_K:
432                 case BPF_ALU | BPF_OR | BPF_X:
433                 case BPF_ALU | BPF_OR | BPF_K:
434                 case BPF_ALU | BPF_LSH | BPF_X:
435                 case BPF_ALU | BPF_LSH | BPF_K:
436                 case BPF_ALU | BPF_RSH | BPF_X:
437                 case BPF_ALU | BPF_RSH | BPF_K:
438                 case BPF_ALU | BPF_XOR | BPF_X:
439                 case BPF_ALU | BPF_XOR | BPF_K:
440                 case BPF_ALU | BPF_MUL | BPF_X:
441                 case BPF_ALU | BPF_MUL | BPF_K:
442                 case BPF_ALU | BPF_DIV | BPF_X:
443                 case BPF_ALU | BPF_DIV | BPF_K:
444                 case BPF_ALU | BPF_MOD | BPF_X:
445                 case BPF_ALU | BPF_MOD | BPF_K:
446                 case BPF_ALU | BPF_NEG:
447                 case BPF_LD | BPF_ABS | BPF_W:
448                 case BPF_LD | BPF_ABS | BPF_H:
449                 case BPF_LD | BPF_ABS | BPF_B:
450                 case BPF_LD | BPF_IND | BPF_W:
451                 case BPF_LD | BPF_IND | BPF_H:
452                 case BPF_LD | BPF_IND | BPF_B:
453                         /* Check for overloaded BPF extension and
454                          * directly convert it if found, otherwise
455                          * just move on with mapping.
456                          */
457                         if (BPF_CLASS(fp->code) == BPF_LD &&
458                             BPF_MODE(fp->code) == BPF_ABS &&
459                             convert_bpf_extensions(fp, &insn))
460                                 break;
461 
462                         if (fp->code == (BPF_ALU | BPF_DIV | BPF_X) ||
463                             fp->code == (BPF_ALU | BPF_MOD | BPF_X)) {
464                                 *insn++ = BPF_MOV32_REG(BPF_REG_X, BPF_REG_X);
465                                 /* Error with exception code on div/mod by 0.
466                                  * For cBPF programs, this was always return 0.
467                                  */
468                                 *insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_X, 0, 2);
469                                 *insn++ = BPF_ALU32_REG(BPF_XOR, BPF_REG_A, BPF_REG_A);
470                                 *insn++ = BPF_EXIT_INSN();
471                         }
472 
473                         *insn = BPF_RAW_INSN(fp->code, BPF_REG_A, BPF_REG_X, 0, fp->k);
474                         break;
475 
476                 /* Jump transformation cannot use BPF block macros
477                  * everywhere as offset calculation and target updates
478                  * require a bit more work than the rest, i.e. jump
479                  * opcodes map as-is, but offsets need adjustment.
480                  */
481 
482 #define BPF_EMIT_JMP                                                    \
483         do {                                                            \
484                 const s32 off_min = S16_MIN, off_max = S16_MAX;         \
485                 s32 off;                                                \
486                                                                         \
487                 if (target >= len || target < 0)                        \
488                         goto err;                                       \
489                 off = addrs ? addrs[target] - addrs[i] - 1 : 0;         \
490                 /* Adjust pc relative offset for 2nd or 3rd insn. */    \
491                 off -= insn - tmp_insns;                                \
492                 /* Reject anything not fitting into insn->off. */       \
493                 if (off < off_min || off > off_max)                     \
494                         goto err;                                       \
495                 insn->off = off;                                        \
496         } while (0)
497 
498                 case BPF_JMP | BPF_JA:
499                         target = i + fp->k + 1;
500                         insn->code = fp->code;
501                         BPF_EMIT_JMP;
502                         break;
503 
504                 case BPF_JMP | BPF_JEQ | BPF_K:
505                 case BPF_JMP | BPF_JEQ | BPF_X:
506                 case BPF_JMP | BPF_JSET | BPF_K:
507                 case BPF_JMP | BPF_JSET | BPF_X:
508                 case BPF_JMP | BPF_JGT | BPF_K:
509                 case BPF_JMP | BPF_JGT | BPF_X:
510                 case BPF_JMP | BPF_JGE | BPF_K:
511                 case BPF_JMP | BPF_JGE | BPF_X:
512                         if (BPF_SRC(fp->code) == BPF_K && (int) fp->k < 0) {
513                                 /* BPF immediates are signed, zero extend
514                                  * immediate into tmp register and use it
515                                  * in compare insn.
516                                  */
517                                 *insn++ = BPF_MOV32_IMM(BPF_REG_TMP, fp->k);
518 
519                                 insn->dst_reg = BPF_REG_A;
520                                 insn->src_reg = BPF_REG_TMP;
521                                 bpf_src = BPF_X;
522                         } else {
523                                 insn->dst_reg = BPF_REG_A;
524                                 insn->imm = fp->k;
525                                 bpf_src = BPF_SRC(fp->code);
526                                 insn->src_reg = bpf_src == BPF_X ? BPF_REG_X : 0;
527                         }
528 
529                         /* Common case where 'jump_false' is next insn. */
530                         if (fp->jf == 0) {
531                                 insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
532                                 target = i + fp->jt + 1;
533                                 BPF_EMIT_JMP;
534                                 break;
535                         }
536 
537                         /* Convert some jumps when 'jump_true' is next insn. */
538                         if (fp->jt == 0) {
539                                 switch (BPF_OP(fp->code)) {
540                                 case BPF_JEQ:
541                                         insn->code = BPF_JMP | BPF_JNE | bpf_src;
542                                         break;
543                                 case BPF_JGT:
544                                         insn->code = BPF_JMP | BPF_JLE | bpf_src;
545                                         break;
546                                 case BPF_JGE:
547                                         insn->code = BPF_JMP | BPF_JLT | bpf_src;
548                                         break;
549                                 default:
550                                         goto jmp_rest;
551                                 }
552 
553                                 target = i + fp->jf + 1;
554                                 BPF_EMIT_JMP;
555                                 break;
556                         }
557 jmp_rest:
558                         /* Other jumps are mapped into two insns: Jxx and JA. */
559                         target = i + fp->jt + 1;
560                         insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src;
561                         BPF_EMIT_JMP;
562                         insn++;
563 
564                         insn->code = BPF_JMP | BPF_JA;
565                         target = i + fp->jf + 1;
566                         BPF_EMIT_JMP;
567                         break;
568 
569                 /* ldxb 4 * ([14] & 0xf) is remaped into 6 insns. */
570                 case BPF_LDX | BPF_MSH | BPF_B:
571                         /* tmp = A */
572                         *insn++ = BPF_MOV64_REG(BPF_REG_TMP, BPF_REG_A);
573                         /* A = BPF_R0 = *(u8 *) (skb->data + K) */
574                         *insn++ = BPF_LD_ABS(BPF_B, fp->k);
575                         /* A &= 0xf */
576                         *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 0xf);
577                         /* A <<= 2 */
578                         *insn++ = BPF_ALU32_IMM(BPF_LSH, BPF_REG_A, 2);
579                         /* X = A */
580                         *insn++ = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
581                         /* A = tmp */
582                         *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_TMP);
583                         break;
584 
585                 /* RET_K is remaped into 2 insns. RET_A case doesn't need an
586                  * extra mov as BPF_REG_0 is already mapped into BPF_REG_A.
587                  */
588                 case BPF_RET | BPF_A:
589                 case BPF_RET | BPF_K:
590                         if (BPF_RVAL(fp->code) == BPF_K)
591                                 *insn++ = BPF_MOV32_RAW(BPF_K, BPF_REG_0,
592                                                         0, fp->k);
593                         *insn = BPF_EXIT_INSN();
594                         break;
595 
596                 /* Store to stack. */
597                 case BPF_ST:
598                 case BPF_STX:
599                         stack_off = fp->k * 4  + 4;
600                         *insn = BPF_STX_MEM(BPF_W, BPF_REG_FP, BPF_CLASS(fp->code) ==
601                                             BPF_ST ? BPF_REG_A : BPF_REG_X,
602                                             -stack_off);
603                         /* check_load_and_stores() verifies that classic BPF can
604                          * load from stack only after write, so tracking
605                          * stack_depth for ST|STX insns is enough
606                          */
607                         if (new_prog && new_prog->aux->stack_depth < stack_off)
608                                 new_prog->aux->stack_depth = stack_off;
609                         break;
610 
611                 /* Load from stack. */
612                 case BPF_LD | BPF_MEM:
613                 case BPF_LDX | BPF_MEM:
614                         stack_off = fp->k * 4  + 4;
615                         *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD  ?
616                                             BPF_REG_A : BPF_REG_X, BPF_REG_FP,
617                                             -stack_off);
618                         break;
619 
620                 /* A = K or X = K */
621                 case BPF_LD | BPF_IMM:
622                 case BPF_LDX | BPF_IMM:
623                         *insn = BPF_MOV32_IMM(BPF_CLASS(fp->code) == BPF_LD ?
624                                               BPF_REG_A : BPF_REG_X, fp->k);
625                         break;
626 
627                 /* X = A */
628                 case BPF_MISC | BPF_TAX:
629                         *insn = BPF_MOV64_REG(BPF_REG_X, BPF_REG_A);
630                         break;
631 
632                 /* A = X */
633                 case BPF_MISC | BPF_TXA:
634                         *insn = BPF_MOV64_REG(BPF_REG_A, BPF_REG_X);
635                         break;
636 
637                 /* A = skb->len or X = skb->len */
638                 case BPF_LD | BPF_W | BPF_LEN:
639                 case BPF_LDX | BPF_W | BPF_LEN:
640                         *insn = BPF_LDX_MEM(BPF_W, BPF_CLASS(fp->code) == BPF_LD ?
641                                             BPF_REG_A : BPF_REG_X, BPF_REG_CTX,
642                                             offsetof(struct sk_buff, len));
643                         break;
644 
645                 /* Access seccomp_data fields. */
646                 case BPF_LDX | BPF_ABS | BPF_W:
647                         /* A = *(u32 *) (ctx + K) */
648                         *insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX, fp->k);
649                         break;
650 
651                 /* Unknown instruction. */
652                 default:
653                         goto err;
654                 }
655 
656                 insn++;
657                 if (new_prog)
658                         memcpy(new_insn, tmp_insns,
659                                sizeof(*insn) * (insn - tmp_insns));
660                 new_insn += insn - tmp_insns;
661         }
662 
663         if (!new_prog) {
664                 /* Only calculating new length. */
665                 *new_len = new_insn - first_insn;
666                 return 0;
667         }
668 
669         pass++;
670         if (new_flen != new_insn - first_insn) {
671                 new_flen = new_insn - first_insn;
672                 if (pass > 2)
673                         goto err;
674                 goto do_pass;
675         }
676 
677         kfree(addrs);
678         BUG_ON(*new_len != new_flen);
679         return 0;
680 err:
681         kfree(addrs);
682         return -EINVAL;
683 }
684 
685 /* Security:
686  *
687  * As we dont want to clear mem[] array for each packet going through
688  * __bpf_prog_run(), we check that filter loaded by user never try to read
689  * a cell if not previously written, and we check all branches to be sure
690  * a malicious user doesn't try to abuse us.
691  */
692 static int check_load_and_stores(const struct sock_filter *filter, int flen)
693 {
694         u16 *masks, memvalid = 0; /* One bit per cell, 16 cells */
695         int pc, ret = 0;
696 
697         BUILD_BUG_ON(BPF_MEMWORDS > 16);
698 
699         masks = kmalloc_array(flen, sizeof(*masks), GFP_KERNEL);
700         if (!masks)
701                 return -ENOMEM;
702 
703         memset(masks, 0xff, flen * sizeof(*masks));
704 
705         for (pc = 0; pc < flen; pc++) {
706                 memvalid &= masks[pc];
707 
708                 switch (filter[pc].code) {
709                 case BPF_ST:
710                 case BPF_STX:
711                         memvalid |= (1 << filter[pc].k);
712                         break;
713                 case BPF_LD | BPF_MEM:
714                 case BPF_LDX | BPF_MEM:
715                         if (!(memvalid & (1 << filter[pc].k))) {
716                                 ret = -EINVAL;
717                                 goto error;
718                         }
719                         break;
720                 case BPF_JMP | BPF_JA:
721                         /* A jump must set masks on target */
722                         masks[pc + 1 + filter[pc].k] &= memvalid;
723                         memvalid = ~0;
724                         break;
725                 case BPF_JMP | BPF_JEQ | BPF_K:
726                 case BPF_JMP | BPF_JEQ | BPF_X:
727                 case BPF_JMP | BPF_JGE | BPF_K:
728                 case BPF_JMP | BPF_JGE | BPF_X:
729                 case BPF_JMP | BPF_JGT | BPF_K:
730                 case BPF_JMP | BPF_JGT | BPF_X:
731                 case BPF_JMP | BPF_JSET | BPF_K:
732                 case BPF_JMP | BPF_JSET | BPF_X:
733                         /* A jump must set masks on targets */
734                         masks[pc + 1 + filter[pc].jt] &= memvalid;
735                         masks[pc + 1 + filter[pc].jf] &= memvalid;
736                         memvalid = ~0;
737                         break;
738                 }
739         }
740 error:
741         kfree(masks);
742         return ret;
743 }
744 
745 static bool chk_code_allowed(u16 code_to_probe)
746 {
747         static const bool codes[] = {
748                 /* 32 bit ALU operations */
749                 [BPF_ALU | BPF_ADD | BPF_K] = true,
750                 [BPF_ALU | BPF_ADD | BPF_X] = true,
751                 [BPF_ALU | BPF_SUB | BPF_K] = true,
752                 [BPF_ALU | BPF_SUB | BPF_X] = true,
753                 [BPF_ALU | BPF_MUL | BPF_K] = true,
754                 [BPF_ALU | BPF_MUL | BPF_X] = true,
755                 [BPF_ALU | BPF_DIV | BPF_K] = true,
756                 [BPF_ALU | BPF_DIV | BPF_X] = true,
757                 [BPF_ALU | BPF_MOD | BPF_K] = true,
758                 [BPF_ALU | BPF_MOD | BPF_X] = true,
759                 [BPF_ALU | BPF_AND | BPF_K] = true,
760                 [BPF_ALU | BPF_AND | BPF_X] = true,
761                 [BPF_ALU | BPF_OR | BPF_K] = true,
762                 [BPF_ALU | BPF_OR | BPF_X] = true,
763                 [BPF_ALU | BPF_XOR | BPF_K] = true,
764                 [BPF_ALU | BPF_XOR | BPF_X] = true,
765                 [BPF_ALU | BPF_LSH | BPF_K] = true,
766                 [BPF_ALU | BPF_LSH | BPF_X] = true,
767                 [BPF_ALU | BPF_RSH | BPF_K] = true,
768                 [BPF_ALU | BPF_RSH | BPF_X] = true,
769                 [BPF_ALU | BPF_NEG] = true,
770                 /* Load instructions */
771                 [BPF_LD | BPF_W | BPF_ABS] = true,
772                 [BPF_LD | BPF_H | BPF_ABS] = true,
773                 [BPF_LD | BPF_B | BPF_ABS] = true,
774                 [BPF_LD | BPF_W | BPF_LEN] = true,
775                 [BPF_LD | BPF_W | BPF_IND] = true,
776                 [BPF_LD | BPF_H | BPF_IND] = true,
777                 [BPF_LD | BPF_B | BPF_IND] = true,
778                 [BPF_LD | BPF_IMM] = true,
779                 [BPF_LD | BPF_MEM] = true,
780                 [BPF_LDX | BPF_W | BPF_LEN] = true,
781                 [BPF_LDX | BPF_B | BPF_MSH] = true,
782                 [BPF_LDX | BPF_IMM] = true,
783                 [BPF_LDX | BPF_MEM] = true,
784                 /* Store instructions */
785                 [BPF_ST] = true,
786                 [BPF_STX] = true,
787                 /* Misc instructions */
788                 [BPF_MISC | BPF_TAX] = true,
789                 [BPF_MISC | BPF_TXA] = true,
790                 /* Return instructions */
791                 [BPF_RET | BPF_K] = true,
792                 [BPF_RET | BPF_A] = true,
793                 /* Jump instructions */
794                 [BPF_JMP | BPF_JA] = true,
795                 [BPF_JMP | BPF_JEQ | BPF_K] = true,
796                 [BPF_JMP | BPF_JEQ | BPF_X] = true,
797                 [BPF_JMP | BPF_JGE | BPF_K] = true,
798                 [BPF_JMP | BPF_JGE | BPF_X] = true,
799                 [BPF_JMP | BPF_JGT | BPF_K] = true,
800                 [BPF_JMP | BPF_JGT | BPF_X] = true,
801                 [BPF_JMP | BPF_JSET | BPF_K] = true,
802                 [BPF_JMP | BPF_JSET | BPF_X] = true,
803         };
804 
805         if (code_to_probe >= ARRAY_SIZE(codes))
806                 return false;
807 
808         return codes[code_to_probe];
809 }
810 
811 static bool bpf_check_basics_ok(const struct sock_filter *filter,
812                                 unsigned int flen)
813 {
814         if (filter == NULL)
815                 return false;
816         if (flen == 0 || flen > BPF_MAXINSNS)
817                 return false;
818 
819         return true;
820 }
821 
822 /**
823  *      bpf_check_classic - verify socket filter code
824  *      @filter: filter to verify
825  *      @flen: length of filter
826  *
827  * Check the user's filter code. If we let some ugly
828  * filter code slip through kaboom! The filter must contain
829  * no references or jumps that are out of range, no illegal
830  * instructions, and must end with a RET instruction.
831  *
832  * All jumps are forward as they are not signed.
833  *
834  * Returns 0 if the rule set is legal or -EINVAL if not.
835  */
836 static int bpf_check_classic(const struct sock_filter *filter,
837                              unsigned int flen)
838 {
839         bool anc_found;
840         int pc;
841 
842         /* Check the filter code now */
843         for (pc = 0; pc < flen; pc++) {
844                 const struct sock_filter *ftest = &filter[pc];
845 
846                 /* May we actually operate on this code? */
847                 if (!chk_code_allowed(ftest->code))
848                         return -EINVAL;
849 
850                 /* Some instructions need special checks */
851                 switch (ftest->code) {
852                 case BPF_ALU | BPF_DIV | BPF_K:
853                 case BPF_ALU | BPF_MOD | BPF_K:
854                         /* Check for division by zero */
855                         if (ftest->k == 0)
856                                 return -EINVAL;
857                         break;
858                 case BPF_ALU | BPF_LSH | BPF_K:
859                 case BPF_ALU | BPF_RSH | BPF_K:
860                         if (ftest->k >= 32)
861                                 return -EINVAL;
862                         break;
863                 case BPF_LD | BPF_MEM:
864                 case BPF_LDX | BPF_MEM:
865                 case BPF_ST:
866                 case BPF_STX:
867                         /* Check for invalid memory addresses */
868                         if (ftest->k >= BPF_MEMWORDS)
869                                 return -EINVAL;
870                         break;
871                 case BPF_JMP | BPF_JA:
872                         /* Note, the large ftest->k might cause loops.
873                          * Compare this with conditional jumps below,
874                          * where offsets are limited. --ANK (981016)
875                          */
876                         if (ftest->k >= (unsigned int)(flen - pc - 1))
877                                 return -EINVAL;
878                         break;
879                 case BPF_JMP | BPF_JEQ | BPF_K:
880                 case BPF_JMP | BPF_JEQ | BPF_X:
881                 case BPF_JMP | BPF_JGE | BPF_K:
882                 case BPF_JMP | BPF_JGE | BPF_X:
883                 case BPF_JMP | BPF_JGT | BPF_K:
884                 case BPF_JMP | BPF_JGT | BPF_X:
885                 case BPF_JMP | BPF_JSET | BPF_K:
886                 case BPF_JMP | BPF_JSET | BPF_X:
887                         /* Both conditionals must be safe */
888                         if (pc + ftest->jt + 1 >= flen ||
889                             pc + ftest->jf + 1 >= flen)
890                                 return -EINVAL;
891                         break;
892                 case BPF_LD | BPF_W | BPF_ABS:
893                 case BPF_LD | BPF_H | BPF_ABS:
894                 case BPF_LD | BPF_B | BPF_ABS:
895                         anc_found = false;
896                         if (bpf_anc_helper(ftest) & BPF_ANC)
897                                 anc_found = true;
898                         /* Ancillary operation unknown or unsupported */
899                         if (anc_found == false && ftest->k >= SKF_AD_OFF)
900                                 return -EINVAL;
901                 }
902         }
903 
904         /* Last instruction must be a RET code */
905         switch (filter[flen - 1].code) {
906         case BPF_RET | BPF_K:
907         case BPF_RET | BPF_A:
908                 return check_load_and_stores(filter, flen);
909         }
910 
911         return -EINVAL;
912 }
913 
914 static int bpf_prog_store_orig_filter(struct bpf_prog *fp,
915                                       const struct sock_fprog *fprog)
916 {
917         unsigned int fsize = bpf_classic_proglen(fprog);
918         struct sock_fprog_kern *fkprog;
919 
920         fp->orig_prog = kmalloc(sizeof(*fkprog), GFP_KERNEL);
921         if (!fp->orig_prog)
922                 return -ENOMEM;
923 
924         fkprog = fp->orig_prog;
925         fkprog->len = fprog->len;
926 
927         fkprog->filter = kmemdup(fp->insns, fsize,
928                                  GFP_KERNEL | __GFP_NOWARN);
929         if (!fkprog->filter) {
930                 kfree(fp->orig_prog);
931                 return -ENOMEM;
932         }
933 
934         return 0;
935 }
936 
937 static void bpf_release_orig_filter(struct bpf_prog *fp)
938 {
939         struct sock_fprog_kern *fprog = fp->orig_prog;
940 
941         if (fprog) {
942                 kfree(fprog->filter);
943                 kfree(fprog);
944         }
945 }
946 
947 static void __bpf_prog_release(struct bpf_prog *prog)
948 {
949         if (prog->type == BPF_PROG_TYPE_SOCKET_FILTER) {
950                 bpf_prog_put(prog);
951         } else {
952                 bpf_release_orig_filter(prog);
953                 bpf_prog_free(prog);
954         }
955 }
956 
957 static void __sk_filter_release(struct sk_filter *fp)
958 {
959         __bpf_prog_release(fp->prog);
960         kfree(fp);
961 }
962 
963 /**
964  *      sk_filter_release_rcu - Release a socket filter by rcu_head
965  *      @rcu: rcu_head that contains the sk_filter to free
966  */
967 static void sk_filter_release_rcu(struct rcu_head *rcu)
968 {
969         struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
970 
971         __sk_filter_release(fp);
972 }
973 
974 /**
975  *      sk_filter_release - release a socket filter
976  *      @fp: filter to remove
977  *
978  *      Remove a filter from a socket and release its resources.
979  */
980 static void sk_filter_release(struct sk_filter *fp)
981 {
982         if (refcount_dec_and_test(&fp->refcnt))
983                 call_rcu(&fp->rcu, sk_filter_release_rcu);
984 }
985 
986 void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
987 {
988         u32 filter_size = bpf_prog_size(fp->prog->len);
989 
990         atomic_sub(filter_size, &sk->sk_omem_alloc);
991         sk_filter_release(fp);
992 }
993 
994 /* try to charge the socket memory if there is space available
995  * return true on success
996  */
997 static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
998 {
999         u32 filter_size = bpf_prog_size(fp->prog->len);
1000 
1001         /* same check as in sock_kmalloc() */
1002         if (filter_size <= sysctl_optmem_max &&
1003             atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
1004                 atomic_add(filter_size, &sk->sk_omem_alloc);
1005                 return true;
1006         }
1007         return false;
1008 }
1009 
1010 bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
1011 {
1012         if (!refcount_inc_not_zero(&fp->refcnt))
1013                 return false;
1014 
1015         if (!__sk_filter_charge(sk, fp)) {
1016                 sk_filter_release(fp);
1017                 return false;
1018         }
1019         return true;
1020 }
1021 
1022 static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
1023 {
1024         struct sock_filter *old_prog;
1025         struct bpf_prog *old_fp;
1026         int err, new_len, old_len = fp->len;
1027 
1028         /* We are free to overwrite insns et al right here as it
1029          * won't be used at this point in time anymore internally
1030          * after the migration to the internal BPF instruction
1031          * representation.
1032          */
1033         BUILD_BUG_ON(sizeof(struct sock_filter) !=
1034                      sizeof(struct bpf_insn));
1035 
1036         /* Conversion cannot happen on overlapping memory areas,
1037          * so we need to keep the user BPF around until the 2nd
1038          * pass. At this time, the user BPF is stored in fp->insns.
1039          */
1040         old_prog = kmemdup(fp->insns, old_len * sizeof(struct sock_filter),
1041                            GFP_KERNEL | __GFP_NOWARN);
1042         if (!old_prog) {
1043                 err = -ENOMEM;
1044                 goto out_err;
1045         }
1046 
1047         /* 1st pass: calculate the new program length. */
1048         err = bpf_convert_filter(old_prog, old_len, NULL, &new_len);
1049         if (err)
1050                 goto out_err_free;
1051 
1052         /* Expand fp for appending the new filter representation. */
1053         old_fp = fp;
1054         fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
1055         if (!fp) {
1056                 /* The old_fp is still around in case we couldn't
1057                  * allocate new memory, so uncharge on that one.
1058                  */
1059                 fp = old_fp;
1060                 err = -ENOMEM;
1061                 goto out_err_free;
1062         }
1063 
1064         fp->len = new_len;
1065 
1066         /* 2nd pass: remap sock_filter insns into bpf_insn insns. */
1067         err = bpf_convert_filter(old_prog, old_len, fp, &new_len);
1068         if (err)
1069                 /* 2nd bpf_convert_filter() can fail only if it fails
1070                  * to allocate memory, remapping must succeed. Note,
1071                  * that at this time old_fp has already been released
1072                  * by krealloc().
1073                  */
1074                 goto out_err_free;
1075 
1076         fp = bpf_prog_select_runtime(fp, &err);
1077         if (err)
1078                 goto out_err_free;
1079 
1080         kfree(old_prog);
1081         return fp;
1082 
1083 out_err_free:
1084         kfree(old_prog);
1085 out_err:
1086         __bpf_prog_release(fp);
1087         return ERR_PTR(err);
1088 }
1089 
1090 static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp,
1091                                            bpf_aux_classic_check_t trans)
1092 {
1093         int err;
1094 
1095         fp->bpf_func = NULL;
1096         fp->jited = 0;
1097 
1098         err = bpf_check_classic(fp->insns, fp->len);
1099         if (err) {
1100                 __bpf_prog_release(fp);
1101                 return ERR_PTR(err);
1102         }
1103 
1104         /* There might be additional checks and transformations
1105          * needed on classic filters, f.e. in case of seccomp.
1106          */
1107         if (trans) {
1108                 err = trans(fp->insns, fp->len);
1109                 if (err) {
1110                         __bpf_prog_release(fp);
1111                         return ERR_PTR(err);
1112                 }
1113         }
1114 
1115         /* Probe if we can JIT compile the filter and if so, do
1116          * the compilation of the filter.
1117          */
1118         bpf_jit_compile(fp);
1119 
1120         /* JIT compiler couldn't process this filter, so do the
1121          * internal BPF translation for the optimized interpreter.
1122          */
1123         if (!fp->jited)
1124                 fp = bpf_migrate_filter(fp);
1125 
1126         return fp;
1127 }
1128 
1129 /**
1130  *      bpf_prog_create - create an unattached filter
1131  *      @pfp: the unattached filter that is created
1132  *      @fprog: the filter program
1133  *
1134  * Create a filter independent of any socket. We first run some
1135  * sanity checks on it to make sure it does not explode on us later.
1136  * If an error occurs or there is insufficient memory for the filter
1137  * a negative errno code is returned. On success the return is zero.
1138  */
1139 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
1140 {
1141         unsigned int fsize = bpf_classic_proglen(fprog);
1142         struct bpf_prog *fp;
1143 
1144         /* Make sure new filter is there and in the right amounts. */
1145         if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1146                 return -EINVAL;
1147 
1148         fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1149         if (!fp)
1150                 return -ENOMEM;
1151 
1152         memcpy(fp->insns, fprog->filter, fsize);
1153 
1154         fp->len = fprog->len;
1155         /* Since unattached filters are not copied back to user
1156          * space through sk_get_filter(), we do not need to hold
1157          * a copy here, and can spare us the work.
1158          */
1159         fp->orig_prog = NULL;
1160 
1161         /* bpf_prepare_filter() already takes care of freeing
1162          * memory in case something goes wrong.
1163          */
1164         fp = bpf_prepare_filter(fp, NULL);
1165         if (IS_ERR(fp))
1166                 return PTR_ERR(fp);
1167 
1168         *pfp = fp;
1169         return 0;
1170 }
1171 EXPORT_SYMBOL_GPL(bpf_prog_create);
1172 
1173 /**
1174  *      bpf_prog_create_from_user - create an unattached filter from user buffer
1175  *      @pfp: the unattached filter that is created
1176  *      @fprog: the filter program
1177  *      @trans: post-classic verifier transformation handler
1178  *      @save_orig: save classic BPF program
1179  *
1180  * This function effectively does the same as bpf_prog_create(), only
1181  * that it builds up its insns buffer from user space provided buffer.
1182  * It also allows for passing a bpf_aux_classic_check_t handler.
1183  */
1184 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
1185                               bpf_aux_classic_check_t trans, bool save_orig)
1186 {
1187         unsigned int fsize = bpf_classic_proglen(fprog);
1188         struct bpf_prog *fp;
1189         int err;
1190 
1191         /* Make sure new filter is there and in the right amounts. */
1192         if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1193                 return -EINVAL;
1194 
1195         fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1196         if (!fp)
1197                 return -ENOMEM;
1198 
1199         if (copy_from_user(fp->insns, fprog->filter, fsize)) {
1200                 __bpf_prog_free(fp);
1201                 return -EFAULT;
1202         }
1203 
1204         fp->len = fprog->len;
1205         fp->orig_prog = NULL;
1206 
1207         if (save_orig) {
1208                 err = bpf_prog_store_orig_filter(fp, fprog);
1209                 if (err) {
1210                         __bpf_prog_free(fp);
1211                         return -ENOMEM;
1212                 }
1213         }
1214 
1215         /* bpf_prepare_filter() already takes care of freeing
1216          * memory in case something goes wrong.
1217          */
1218         fp = bpf_prepare_filter(fp, trans);
1219         if (IS_ERR(fp))
1220                 return PTR_ERR(fp);
1221 
1222         *pfp = fp;
1223         return 0;
1224 }
1225 EXPORT_SYMBOL_GPL(bpf_prog_create_from_user);
1226 
1227 void bpf_prog_destroy(struct bpf_prog *fp)
1228 {
1229         __bpf_prog_release(fp);
1230 }
1231 EXPORT_SYMBOL_GPL(bpf_prog_destroy);
1232 
1233 static int __sk_attach_prog(struct bpf_prog *prog, struct sock *sk)
1234 {
1235         struct sk_filter *fp, *old_fp;
1236 
1237         fp = kmalloc(sizeof(*fp), GFP_KERNEL);
1238         if (!fp)
1239                 return -ENOMEM;
1240 
1241         fp->prog = prog;
1242 
1243         if (!__sk_filter_charge(sk, fp)) {
1244                 kfree(fp);
1245                 return -ENOMEM;
1246         }
1247         refcount_set(&fp->refcnt, 1);
1248 
1249         old_fp = rcu_dereference_protected(sk->sk_filter,
1250                                            lockdep_sock_is_held(sk));
1251         rcu_assign_pointer(sk->sk_filter, fp);
1252 
1253         if (old_fp)
1254                 sk_filter_uncharge(sk, old_fp);
1255 
1256         return 0;
1257 }
1258 
1259 static int __reuseport_attach_prog(struct bpf_prog *prog, struct sock *sk)
1260 {
1261         struct bpf_prog *old_prog;
1262         int err;
1263 
1264         if (bpf_prog_size(prog->len) > sysctl_optmem_max)
1265                 return -ENOMEM;
1266 
1267         if (sk_unhashed(sk) && sk->sk_reuseport) {
1268                 err = reuseport_alloc(sk);
1269                 if (err)
1270                         return err;
1271         } else if (!rcu_access_pointer(sk->sk_reuseport_cb)) {
1272                 /* The socket wasn't bound with SO_REUSEPORT */
1273                 return -EINVAL;
1274         }
1275 
1276         old_prog = reuseport_attach_prog(sk, prog);
1277         if (old_prog)
1278                 bpf_prog_destroy(old_prog);
1279 
1280         return 0;
1281 }
1282 
1283 static
1284 struct bpf_prog *__get_filter(struct sock_fprog *fprog, struct sock *sk)
1285 {
1286         unsigned int fsize = bpf_classic_proglen(fprog);
1287         struct bpf_prog *prog;
1288         int err;
1289 
1290         if (sock_flag(sk, SOCK_FILTER_LOCKED))
1291                 return ERR_PTR(-EPERM);
1292 
1293         /* Make sure new filter is there and in the right amounts. */
1294         if (!bpf_check_basics_ok(fprog->filter, fprog->len))
1295                 return ERR_PTR(-EINVAL);
1296 
1297         prog = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
1298         if (!prog)
1299                 return ERR_PTR(-ENOMEM);
1300 
1301         if (copy_from_user(prog->insns, fprog->filter, fsize)) {
1302                 __bpf_prog_free(prog);
1303                 return ERR_PTR(-EFAULT);
1304         }
1305 
1306         prog->len = fprog->len;
1307 
1308         err = bpf_prog_store_orig_filter(prog, fprog);
1309         if (err) {
1310                 __bpf_prog_free(prog);
1311                 return ERR_PTR(-ENOMEM);
1312         }
1313 
1314         /* bpf_prepare_filter() already takes care of freeing
1315          * memory in case something goes wrong.
1316          */
1317         return bpf_prepare_filter(prog, NULL);
1318 }
1319 
1320 /**
1321  *      sk_attach_filter - attach a socket filter
1322  *      @fprog: the filter program
1323  *      @sk: the socket to use
1324  *
1325  * Attach the user's filter code. We first run some sanity checks on
1326  * it to make sure it does not explode on us later. If an error
1327  * occurs or there is insufficient memory for the filter a negative
1328  * errno code is returned. On success the return is zero.
1329  */
1330 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1331 {
1332         struct bpf_prog *prog = __get_filter(fprog, sk);
1333         int err;
1334 
1335         if (IS_ERR(prog))
1336                 return PTR_ERR(prog);
1337 
1338         err = __sk_attach_prog(prog, sk);
1339         if (err < 0) {
1340                 __bpf_prog_release(prog);
1341                 return err;
1342         }
1343 
1344         return 0;
1345 }
1346 EXPORT_SYMBOL_GPL(sk_attach_filter);
1347 
1348 int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk)
1349 {
1350         struct bpf_prog *prog = __get_filter(fprog, sk);
1351         int err;
1352 
1353         if (IS_ERR(prog))
1354                 return PTR_ERR(prog);
1355 
1356         err = __reuseport_attach_prog(prog, sk);
1357         if (err < 0) {
1358                 __bpf_prog_release(prog);
1359                 return err;
1360         }
1361 
1362         return 0;
1363 }
1364 
1365 static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk)
1366 {
1367         if (sock_flag(sk, SOCK_FILTER_LOCKED))
1368                 return ERR_PTR(-EPERM);
1369 
1370         return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER);
1371 }
1372 
1373 int sk_attach_bpf(u32 ufd, struct sock *sk)
1374 {
1375         struct bpf_prog *prog = __get_bpf(ufd, sk);
1376         int err;
1377 
1378         if (IS_ERR(prog))
1379                 return PTR_ERR(prog);
1380 
1381         err = __sk_attach_prog(prog, sk);
1382         if (err < 0) {
1383                 bpf_prog_put(prog);
1384                 return err;
1385         }
1386 
1387         return 0;
1388 }
1389 
1390 int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk)
1391 {
1392         struct bpf_prog *prog = __get_bpf(ufd, sk);
1393         int err;
1394 
1395         if (IS_ERR(prog))
1396                 return PTR_ERR(prog);
1397 
1398         err = __reuseport_attach_prog(prog, sk);
1399         if (err < 0) {
1400                 bpf_prog_put(prog);
1401                 return err;
1402         }
1403 
1404         return 0;
1405 }
1406 
1407 struct bpf_scratchpad {
1408         union {
1409                 __be32 diff[MAX_BPF_STACK / sizeof(__be32)];
1410                 u8     buff[MAX_BPF_STACK];
1411         };
1412 };
1413 
1414 static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp);
1415 
1416 static inline int __bpf_try_make_writable(struct sk_buff *skb,
1417                                           unsigned int write_len)
1418 {
1419         return skb_ensure_writable(skb, write_len);
1420 }
1421 
1422 static inline int bpf_try_make_writable(struct sk_buff *skb,
1423                                         unsigned int write_len)
1424 {
1425         int err = __bpf_try_make_writable(skb, write_len);
1426 
1427         bpf_compute_data_pointers(skb);
1428         return err;
1429 }
1430 
1431 static int bpf_try_make_head_writable(struct sk_buff *skb)
1432 {
1433         return bpf_try_make_writable(skb, skb_headlen(skb));
1434 }
1435 
1436 static inline void bpf_push_mac_rcsum(struct sk_buff *skb)
1437 {
1438         if (skb_at_tc_ingress(skb))
1439                 skb_postpush_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1440 }
1441 
1442 static inline void bpf_pull_mac_rcsum(struct sk_buff *skb)
1443 {
1444         if (skb_at_tc_ingress(skb))
1445                 skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len);
1446 }
1447 
1448 BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset,
1449            const void *, from, u32, len, u64, flags)
1450 {
1451         void *ptr;
1452 
1453         if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH)))
1454                 return -EINVAL;
1455         if (unlikely(offset > 0xffff))
1456                 return -EFAULT;
1457         if (unlikely(bpf_try_make_writable(skb, offset + len)))
1458                 return -EFAULT;
1459 
1460         ptr = skb->data + offset;
1461         if (flags & BPF_F_RECOMPUTE_CSUM)
1462                 __skb_postpull_rcsum(skb, ptr, len, offset);
1463 
1464         memcpy(ptr, from, len);
1465 
1466         if (flags & BPF_F_RECOMPUTE_CSUM)
1467                 __skb_postpush_rcsum(skb, ptr, len, offset);
1468         if (flags & BPF_F_INVALIDATE_HASH)
1469                 skb_clear_hash(skb);
1470 
1471         return 0;
1472 }
1473 
1474 static const struct bpf_func_proto bpf_skb_store_bytes_proto = {
1475         .func           = bpf_skb_store_bytes,
1476         .gpl_only       = false,
1477         .ret_type       = RET_INTEGER,
1478         .arg1_type      = ARG_PTR_TO_CTX,
1479         .arg2_type      = ARG_ANYTHING,
1480         .arg3_type      = ARG_PTR_TO_MEM,
1481         .arg4_type      = ARG_CONST_SIZE,
1482         .arg5_type      = ARG_ANYTHING,
1483 };
1484 
1485 BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset,
1486            void *, to, u32, len)
1487 {
1488         void *ptr;
1489 
1490         if (unlikely(offset > 0xffff))
1491                 goto err_clear;
1492 
1493         ptr = skb_header_pointer(skb, offset, len, to);
1494         if (unlikely(!ptr))
1495                 goto err_clear;
1496         if (ptr != to)
1497                 memcpy(to, ptr, len);
1498 
1499         return 0;
1500 err_clear:
1501         memset(to, 0, len);
1502         return -EFAULT;
1503 }
1504 
1505 static const struct bpf_func_proto bpf_skb_load_bytes_proto = {
1506         .func           = bpf_skb_load_bytes,
1507         .gpl_only       = false,
1508         .ret_type       = RET_INTEGER,
1509         .arg1_type      = ARG_PTR_TO_CTX,
1510         .arg2_type      = ARG_ANYTHING,
1511         .arg3_type      = ARG_PTR_TO_UNINIT_MEM,
1512         .arg4_type      = ARG_CONST_SIZE,
1513 };
1514 
1515 BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len)
1516 {
1517         /* Idea is the following: should the needed direct read/write
1518          * test fail during runtime, we can pull in more data and redo
1519          * again, since implicitly, we invalidate previous checks here.
1520          *
1521          * Or, since we know how much we need to make read/writeable,
1522          * this can be done once at the program beginning for direct
1523          * access case. By this we overcome limitations of only current
1524          * headroom being accessible.
1525          */
1526         return bpf_try_make_writable(skb, len ? : skb_headlen(skb));
1527 }
1528 
1529 static const struct bpf_func_proto bpf_skb_pull_data_proto = {
1530         .func           = bpf_skb_pull_data,
1531         .gpl_only       = false,
1532         .ret_type       = RET_INTEGER,
1533         .arg1_type      = ARG_PTR_TO_CTX,
1534         .arg2_type      = ARG_ANYTHING,
1535 };
1536 
1537 BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset,
1538            u64, from, u64, to, u64, flags)
1539 {
1540         __sum16 *ptr;
1541 
1542         if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK)))
1543                 return -EINVAL;
1544         if (unlikely(offset > 0xffff || offset & 1))
1545                 return -EFAULT;
1546         if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1547                 return -EFAULT;
1548 
1549         ptr = (__sum16 *)(skb->data + offset);
1550         switch (flags & BPF_F_HDR_FIELD_MASK) {
1551         case 0:
1552                 if (unlikely(from != 0))
1553                         return -EINVAL;
1554 
1555                 csum_replace_by_diff(ptr, to);
1556                 break;
1557         case 2:
1558                 csum_replace2(ptr, from, to);
1559                 break;
1560         case 4:
1561                 csum_replace4(ptr, from, to);
1562                 break;
1563         default:
1564                 return -EINVAL;
1565         }
1566 
1567         return 0;
1568 }
1569 
1570 static const struct bpf_func_proto bpf_l3_csum_replace_proto = {
1571         .func           = bpf_l3_csum_replace,
1572         .gpl_only       = false,
1573         .ret_type       = RET_INTEGER,
1574         .arg1_type      = ARG_PTR_TO_CTX,
1575         .arg2_type      = ARG_ANYTHING,
1576         .arg3_type      = ARG_ANYTHING,
1577         .arg4_type      = ARG_ANYTHING,
1578         .arg5_type      = ARG_ANYTHING,
1579 };
1580 
1581 BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset,
1582            u64, from, u64, to, u64, flags)
1583 {
1584         bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
1585         bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
1586         bool do_mforce = flags & BPF_F_MARK_ENFORCE;
1587         __sum16 *ptr;
1588 
1589         if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
1590                                BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
1591                 return -EINVAL;
1592         if (unlikely(offset > 0xffff || offset & 1))
1593                 return -EFAULT;
1594         if (unlikely(bpf_try_make_writable(skb, offset + sizeof(*ptr))))
1595                 return -EFAULT;
1596 
1597         ptr = (__sum16 *)(skb->data + offset);
1598         if (is_mmzero && !do_mforce && !*ptr)
1599                 return 0;
1600 
1601         switch (flags & BPF_F_HDR_FIELD_MASK) {
1602         case 0:
1603                 if (unlikely(from != 0))
1604                         return -EINVAL;
1605 
1606                 inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
1607                 break;
1608         case 2:
1609                 inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
1610                 break;
1611         case 4:
1612                 inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
1613                 break;
1614         default:
1615                 return -EINVAL;
1616         }
1617 
1618         if (is_mmzero && !*ptr)
1619                 *ptr = CSUM_MANGLED_0;
1620         return 0;
1621 }
1622 
1623 static const struct bpf_func_proto bpf_l4_csum_replace_proto = {
1624         .func           = bpf_l4_csum_replace,
1625         .gpl_only       = false,
1626         .ret_type       = RET_INTEGER,
1627         .arg1_type      = ARG_PTR_TO_CTX,
1628         .arg2_type      = ARG_ANYTHING,
1629         .arg3_type      = ARG_ANYTHING,
1630         .arg4_type      = ARG_ANYTHING,
1631         .arg5_type      = ARG_ANYTHING,
1632 };
1633 
1634 BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size,
1635            __be32 *, to, u32, to_size, __wsum, seed)
1636 {
1637         struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp);
1638         u32 diff_size = from_size + to_size;
1639         int i, j = 0;
1640 
1641         /* This is quite flexible, some examples:
1642          *
1643          * from_size == 0, to_size > 0,  seed := csum --> pushing data
1644          * from_size > 0,  to_size == 0, seed := csum --> pulling data
1645          * from_size > 0,  to_size > 0,  seed := 0    --> diffing data
1646          *
1647          * Even for diffing, from_size and to_size don't need to be equal.
1648          */
1649         if (unlikely(((from_size | to_size) & (sizeof(__be32) - 1)) ||
1650                      diff_size > sizeof(sp->diff)))
1651                 return -EINVAL;
1652 
1653         for (i = 0; i < from_size / sizeof(__be32); i++, j++)
1654                 sp->diff[j] = ~from[i];
1655         for (i = 0; i <   to_size / sizeof(__be32); i++, j++)
1656                 sp->diff[j] = to[i];
1657 
1658         return csum_partial(sp->diff, diff_size, seed);
1659 }
1660 
1661 static const struct bpf_func_proto bpf_csum_diff_proto = {
1662         .func           = bpf_csum_diff,
1663         .gpl_only       = false,
1664         .pkt_access     = true,
1665         .ret_type       = RET_INTEGER,
1666         .arg1_type      = ARG_PTR_TO_MEM_OR_NULL,
1667         .arg2_type      = ARG_CONST_SIZE_OR_ZERO,
1668         .arg3_type      = ARG_PTR_TO_MEM_OR_NULL,
1669         .arg4_type      = ARG_CONST_SIZE_OR_ZERO,
1670         .arg5_type      = ARG_ANYTHING,
1671 };
1672 
1673 BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum)
1674 {
1675         /* The interface is to be used in combination with bpf_csum_diff()
1676          * for direct packet writes. csum rotation for alignment as well
1677          * as emulating csum_sub() can be done from the eBPF program.
1678          */
1679         if (skb->ip_summed == CHECKSUM_COMPLETE)
1680                 return (skb->csum = csum_add(skb->csum, csum));
1681 
1682         return -ENOTSUPP;
1683 }
1684 
1685 static const struct bpf_func_proto bpf_csum_update_proto = {
1686         .func           = bpf_csum_update,
1687         .gpl_only       = false,
1688         .ret_type       = RET_INTEGER,
1689         .arg1_type      = ARG_PTR_TO_CTX,
1690         .arg2_type      = ARG_ANYTHING,
1691 };
1692 
1693 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
1694 {
1695         return dev_forward_skb(dev, skb);
1696 }
1697 
1698 static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
1699                                       struct sk_buff *skb)
1700 {
1701         int ret = ____dev_forward_skb(dev, skb);
1702 
1703         if (likely(!ret)) {
1704                 skb->dev = dev;
1705                 ret = netif_rx(skb);
1706         }
1707 
1708         return ret;
1709 }
1710 
1711 static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
1712 {
1713         int ret;
1714 
1715         if (unlikely(__this_cpu_read(xmit_recursion) > XMIT_RECURSION_LIMIT)) {
1716                 net_crit_ratelimited("bpf: recursion limit reached on datapath, buggy bpf program?\n");
1717                 kfree_skb(skb);
1718                 return -ENETDOWN;
1719         }
1720 
1721         skb->dev = dev;
1722 
1723         __this_cpu_inc(xmit_recursion);
1724         ret = dev_queue_xmit(skb);
1725         __this_cpu_dec(xmit_recursion);
1726 
1727         return ret;
1728 }
1729 
1730 static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
1731                                  u32 flags)
1732 {
1733         /* skb->mac_len is not set on normal egress */
1734         unsigned int mlen = skb->network_header - skb->mac_header;
1735 
1736         __skb_pull(skb, mlen);
1737 
1738         /* At ingress, the mac header has already been pulled once.
1739          * At egress, skb_pospull_rcsum has to be done in case that
1740          * the skb is originated from ingress (i.e. a forwarded skb)
1741          * to ensure that rcsum starts at net header.
1742          */
1743         if (!skb_at_tc_ingress(skb))
1744                 skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
1745         skb_pop_mac_header(skb);
1746         skb_reset_mac_len(skb);
1747         return flags & BPF_F_INGRESS ?
1748                __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
1749 }
1750 
1751 static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
1752                                  u32 flags)
1753 {
1754         /* Verify that a link layer header is carried */
1755         if (unlikely(skb->mac_header >= skb->network_header)) {
1756                 kfree_skb(skb);
1757                 return -ERANGE;
1758         }
1759 
1760         bpf_push_mac_rcsum(skb);
1761         return flags & BPF_F_INGRESS ?
1762                __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
1763 }
1764 
1765 static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev,
1766                           u32 flags)
1767 {
1768         if (dev_is_mac_header_xmit(dev))
1769                 return __bpf_redirect_common(skb, dev, flags);
1770         else
1771                 return __bpf_redirect_no_mac(skb, dev, flags);
1772 }
1773 
1774 BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags)
1775 {
1776         struct net_device *dev;
1777         struct sk_buff *clone;
1778         int ret;
1779 
1780         if (unlikely(flags & ~(BPF_F_INGRESS)))
1781                 return -EINVAL;
1782 
1783         dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
1784         if (unlikely(!dev))
1785                 return -EINVAL;
1786 
1787         clone = skb_clone(skb, GFP_ATOMIC);
1788         if (unlikely(!clone))
1789                 return -ENOMEM;
1790 
1791         /* For direct write, we need to keep the invariant that the skbs
1792          * we're dealing with need to be uncloned. Should uncloning fail
1793          * here, we need to free the just generated clone to unclone once
1794          * again.
1795          */
1796         ret = bpf_try_make_head_writable(skb);
1797         if (unlikely(ret)) {
1798                 kfree_skb(clone);
1799                 return -ENOMEM;
1800         }
1801 
1802         return __bpf_redirect(clone, dev, flags);
1803 }
1804 
1805 static const struct bpf_func_proto bpf_clone_redirect_proto = {
1806         .func           = bpf_clone_redirect,
1807         .gpl_only       = false,
1808         .ret_type       = RET_INTEGER,
1809         .arg1_type      = ARG_PTR_TO_CTX,
1810         .arg2_type      = ARG_ANYTHING,
1811         .arg3_type      = ARG_ANYTHING,
1812 };
1813 
1814 struct redirect_info {
1815         u32 ifindex;
1816         u32 flags;
1817         struct bpf_map *map;
1818         struct bpf_map *map_to_flush;
1819         unsigned long   map_owner;
1820 };
1821 
1822 static DEFINE_PER_CPU(struct redirect_info, redirect_info);
1823 
1824 BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags)
1825 {
1826         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1827 
1828         if (unlikely(flags & ~(BPF_F_INGRESS)))
1829                 return TC_ACT_SHOT;
1830 
1831         ri->ifindex = ifindex;
1832         ri->flags = flags;
1833 
1834         return TC_ACT_REDIRECT;
1835 }
1836 
1837 int skb_do_redirect(struct sk_buff *skb)
1838 {
1839         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
1840         struct net_device *dev;
1841 
1842         dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
1843         ri->ifindex = 0;
1844         if (unlikely(!dev)) {
1845                 kfree_skb(skb);
1846                 return -EINVAL;
1847         }
1848 
1849         return __bpf_redirect(skb, dev, ri->flags);
1850 }
1851 
1852 static const struct bpf_func_proto bpf_redirect_proto = {
1853         .func           = bpf_redirect,
1854         .gpl_only       = false,
1855         .ret_type       = RET_INTEGER,
1856         .arg1_type      = ARG_ANYTHING,
1857         .arg2_type      = ARG_ANYTHING,
1858 };
1859 
1860 BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb,
1861            struct bpf_map *, map, u32, key, u64, flags)
1862 {
1863         struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
1864 
1865         /* If user passes invalid input drop the packet. */
1866         if (unlikely(flags & ~(BPF_F_INGRESS)))
1867                 return SK_DROP;
1868 
1869         tcb->bpf.key = key;
1870         tcb->bpf.flags = flags;
1871         tcb->bpf.map = map;
1872 
1873         return SK_PASS;
1874 }
1875 
1876 struct sock *do_sk_redirect_map(struct sk_buff *skb)
1877 {
1878         struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
1879         struct sock *sk = NULL;
1880 
1881         if (tcb->bpf.map) {
1882                 sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key);
1883 
1884                 tcb->bpf.key = 0;
1885                 tcb->bpf.map = NULL;
1886         }
1887 
1888         return sk;
1889 }
1890 
1891 static const struct bpf_func_proto bpf_sk_redirect_map_proto = {
1892         .func           = bpf_sk_redirect_map,
1893         .gpl_only       = false,
1894         .ret_type       = RET_INTEGER,
1895         .arg1_type      = ARG_PTR_TO_CTX,
1896         .arg2_type      = ARG_CONST_MAP_PTR,
1897         .arg3_type      = ARG_ANYTHING,
1898         .arg4_type      = ARG_ANYTHING,
1899 };
1900 
1901 BPF_CALL_4(bpf_msg_redirect_map, struct sk_msg_buff *, msg,
1902            struct bpf_map *, map, u32, key, u64, flags)
1903 {
1904         /* If user passes invalid input drop the packet. */
1905         if (unlikely(flags & ~(BPF_F_INGRESS)))
1906                 return SK_DROP;
1907 
1908         msg->key = key;
1909         msg->flags = flags;
1910         msg->map = map;
1911 
1912         return SK_PASS;
1913 }
1914 
1915 struct sock *do_msg_redirect_map(struct sk_msg_buff *msg)
1916 {
1917         struct sock *sk = NULL;
1918 
1919         if (msg->map) {
1920                 sk = __sock_map_lookup_elem(msg->map, msg->key);
1921 
1922                 msg->key = 0;
1923                 msg->map = NULL;
1924         }
1925 
1926         return sk;
1927 }
1928 
1929 static const struct bpf_func_proto bpf_msg_redirect_map_proto = {
1930         .func           = bpf_msg_redirect_map,
1931         .gpl_only       = false,
1932         .ret_type       = RET_INTEGER,
1933         .arg1_type      = ARG_PTR_TO_CTX,
1934         .arg2_type      = ARG_CONST_MAP_PTR,
1935         .arg3_type      = ARG_ANYTHING,
1936         .arg4_type      = ARG_ANYTHING,
1937 };
1938 
1939 BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg_buff *, msg, u32, bytes)
1940 {
1941         msg->apply_bytes = bytes;
1942         return 0;
1943 }
1944 
1945 static const struct bpf_func_proto bpf_msg_apply_bytes_proto = {
1946         .func           = bpf_msg_apply_bytes,
1947         .gpl_only       = false,
1948         .ret_type       = RET_INTEGER,
1949         .arg1_type      = ARG_PTR_TO_CTX,
1950         .arg2_type      = ARG_ANYTHING,
1951 };
1952 
1953 BPF_CALL_2(bpf_msg_cork_bytes, struct sk_msg_buff *, msg, u32, bytes)
1954 {
1955         msg->cork_bytes = bytes;
1956         return 0;
1957 }
1958 
1959 static const struct bpf_func_proto bpf_msg_cork_bytes_proto = {
1960         .func           = bpf_msg_cork_bytes,
1961         .gpl_only       = false,
1962         .ret_type       = RET_INTEGER,
1963         .arg1_type      = ARG_PTR_TO_CTX,
1964         .arg2_type      = ARG_ANYTHING,
1965 };
1966 
1967 BPF_CALL_4(bpf_msg_pull_data,
1968            struct sk_msg_buff *, msg, u32, start, u32, end, u64, flags)
1969 {
1970         unsigned int len = 0, offset = 0, copy = 0;
1971         struct scatterlist *sg = msg->sg_data;
1972         int first_sg, last_sg, i, shift;
1973         unsigned char *p, *to, *from;
1974         int bytes = end - start;
1975         struct page *page;
1976 
1977         if (unlikely(flags || end <= start))
1978                 return -EINVAL;
1979 
1980         /* First find the starting scatterlist element */
1981         i = msg->sg_start;
1982         do {
1983                 len = sg[i].length;
1984                 offset += len;
1985                 if (start < offset + len)
1986                         break;
1987                 i++;
1988                 if (i == MAX_SKB_FRAGS)
1989                         i = 0;
1990         } while (i != msg->sg_end);
1991 
1992         if (unlikely(start >= offset + len))
1993                 return -EINVAL;
1994 
1995         if (!msg->sg_copy[i] && bytes <= len)
1996                 goto out;
1997 
1998         first_sg = i;
1999 
2000         /* At this point we need to linearize multiple scatterlist
2001          * elements or a single shared page. Either way we need to
2002          * copy into a linear buffer exclusively owned by BPF. Then
2003          * place the buffer in the scatterlist and fixup the original
2004          * entries by removing the entries now in the linear buffer
2005          * and shifting the remaining entries. For now we do not try
2006          * to copy partial entries to avoid complexity of running out
2007          * of sg_entry slots. The downside is reading a single byte
2008          * will copy the entire sg entry.
2009          */
2010         do {
2011                 copy += sg[i].length;
2012                 i++;
2013                 if (i == MAX_SKB_FRAGS)
2014                         i = 0;
2015                 if (bytes < copy)
2016                         break;
2017         } while (i != msg->sg_end);
2018         last_sg = i;
2019 
2020         if (unlikely(copy < end - start))
2021                 return -EINVAL;
2022 
2023         page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC, get_order(copy));
2024         if (unlikely(!page))
2025                 return -ENOMEM;
2026         p = page_address(page);
2027         offset = 0;
2028 
2029         i = first_sg;
2030         do {
2031                 from = sg_virt(&sg[i]);
2032                 len = sg[i].length;
2033                 to = p + offset;
2034 
2035                 memcpy(to, from, len);
2036                 offset += len;
2037                 sg[i].length = 0;
2038                 put_page(sg_page(&sg[i]));
2039 
2040                 i++;
2041                 if (i == MAX_SKB_FRAGS)
2042                         i = 0;
2043         } while (i != last_sg);
2044 
2045         sg[first_sg].length = copy;
2046         sg_set_page(&sg[first_sg], page, copy, 0);
2047 
2048         /* To repair sg ring we need to shift entries. If we only
2049          * had a single entry though we can just replace it and
2050          * be done. Otherwise walk the ring and shift the entries.
2051          */
2052         shift = last_sg - first_sg - 1;
2053         if (!shift)
2054                 goto out;
2055 
2056         i = first_sg + 1;
2057         do {
2058                 int move_from;
2059 
2060                 if (i + shift >= MAX_SKB_FRAGS)
2061                         move_from = i + shift - MAX_SKB_FRAGS;
2062                 else
2063                         move_from = i + shift;
2064 
2065                 if (move_from == msg->sg_end)
2066                         break;
2067 
2068                 sg[i] = sg[move_from];
2069                 sg[move_from].length = 0;
2070                 sg[move_from].page_link = 0;
2071                 sg[move_from].offset = 0;
2072 
2073                 i++;
2074                 if (i == MAX_SKB_FRAGS)
2075                         i = 0;
2076         } while (1);
2077         msg->sg_end -= shift;
2078         if (msg->sg_end < 0)
2079                 msg->sg_end += MAX_SKB_FRAGS;
2080 out:
2081         msg->data = sg_virt(&sg[i]) + start - offset;
2082         msg->data_end = msg->data + bytes;
2083 
2084         return 0;
2085 }
2086 
2087 static const struct bpf_func_proto bpf_msg_pull_data_proto = {
2088         .func           = bpf_msg_pull_data,
2089         .gpl_only       = false,
2090         .ret_type       = RET_INTEGER,
2091         .arg1_type      = ARG_PTR_TO_CTX,
2092         .arg2_type      = ARG_ANYTHING,
2093         .arg3_type      = ARG_ANYTHING,
2094         .arg4_type      = ARG_ANYTHING,
2095 };
2096 
2097 BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb)
2098 {
2099         return task_get_classid(skb);
2100 }
2101 
2102 static const struct bpf_func_proto bpf_get_cgroup_classid_proto = {
2103         .func           = bpf_get_cgroup_classid,
2104         .gpl_only       = false,
2105         .ret_type       = RET_INTEGER,
2106         .arg1_type      = ARG_PTR_TO_CTX,
2107 };
2108 
2109 BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb)
2110 {
2111         return dst_tclassid(skb);
2112 }
2113 
2114 static const struct bpf_func_proto bpf_get_route_realm_proto = {
2115         .func           = bpf_get_route_realm,
2116         .gpl_only       = false,
2117         .ret_type       = RET_INTEGER,
2118         .arg1_type      = ARG_PTR_TO_CTX,
2119 };
2120 
2121 BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb)
2122 {
2123         /* If skb_clear_hash() was called due to mangling, we can
2124          * trigger SW recalculation here. Later access to hash
2125          * can then use the inline skb->hash via context directly
2126          * instead of calling this helper again.
2127          */
2128         return skb_get_hash(skb);
2129 }
2130 
2131 static const struct bpf_func_proto bpf_get_hash_recalc_proto = {
2132         .func           = bpf_get_hash_recalc,
2133         .gpl_only       = false,
2134         .ret_type       = RET_INTEGER,
2135         .arg1_type      = ARG_PTR_TO_CTX,
2136 };
2137 
2138 BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb)
2139 {
2140         /* After all direct packet write, this can be used once for
2141          * triggering a lazy recalc on next skb_get_hash() invocation.
2142          */
2143         skb_clear_hash(skb);
2144         return 0;
2145 }
2146 
2147 static const struct bpf_func_proto bpf_set_hash_invalid_proto = {
2148         .func           = bpf_set_hash_invalid,
2149         .gpl_only       = false,
2150         .ret_type       = RET_INTEGER,
2151         .arg1_type      = ARG_PTR_TO_CTX,
2152 };
2153 
2154 BPF_CALL_2(bpf_set_hash, struct sk_buff *, skb, u32, hash)
2155 {
2156         /* Set user specified hash as L4(+), so that it gets returned
2157          * on skb_get_hash() call unless BPF prog later on triggers a
2158          * skb_clear_hash().
2159          */
2160         __skb_set_sw_hash(skb, hash, true);
2161         return 0;
2162 }
2163 
2164 static const struct bpf_func_proto bpf_set_hash_proto = {
2165         .func           = bpf_set_hash,
2166         .gpl_only       = false,
2167         .ret_type       = RET_INTEGER,
2168         .arg1_type      = ARG_PTR_TO_CTX,
2169         .arg2_type      = ARG_ANYTHING,
2170 };
2171 
2172 BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto,
2173            u16, vlan_tci)
2174 {
2175         int ret;
2176 
2177         if (unlikely(vlan_proto != htons(ETH_P_8021Q) &&
2178                      vlan_proto != htons(ETH_P_8021AD)))
2179                 vlan_proto = htons(ETH_P_8021Q);
2180 
2181         bpf_push_mac_rcsum(skb);
2182         ret = skb_vlan_push(skb, vlan_proto, vlan_tci);
2183         bpf_pull_mac_rcsum(skb);
2184 
2185         bpf_compute_data_pointers(skb);
2186         return ret;
2187 }
2188 
2189 const struct bpf_func_proto bpf_skb_vlan_push_proto = {
2190         .func           = bpf_skb_vlan_push,
2191         .gpl_only       = false,
2192         .ret_type       = RET_INTEGER,
2193         .arg1_type      = ARG_PTR_TO_CTX,
2194         .arg2_type      = ARG_ANYTHING,
2195         .arg3_type      = ARG_ANYTHING,
2196 };
2197 EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto);
2198 
2199 BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb)
2200 {
2201         int ret;
2202 
2203         bpf_push_mac_rcsum(skb);
2204         ret = skb_vlan_pop(skb);
2205         bpf_pull_mac_rcsum(skb);
2206 
2207         bpf_compute_data_pointers(skb);
2208         return ret;
2209 }
2210 
2211 const struct bpf_func_proto bpf_skb_vlan_pop_proto = {
2212         .func           = bpf_skb_vlan_pop,
2213         .gpl_only       = false,
2214         .ret_type       = RET_INTEGER,
2215         .arg1_type      = ARG_PTR_TO_CTX,
2216 };
2217 EXPORT_SYMBOL_GPL(bpf_skb_vlan_pop_proto);
2218 
2219 static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
2220 {
2221         /* Caller already did skb_cow() with len as headroom,
2222          * so no need to do it here.
2223          */
2224         skb_push(skb, len);
2225         memmove(skb->data, skb->data + len, off);
2226         memset(skb->data + off, 0, len);
2227 
2228         /* No skb_postpush_rcsum(skb, skb->data + off, len)
2229          * needed here as it does not change the skb->csum
2230          * result for checksum complete when summing over
2231          * zeroed blocks.
2232          */
2233         return 0;
2234 }
2235 
2236 static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
2237 {
2238         /* skb_ensure_writable() is not needed here, as we're
2239          * already working on an uncloned skb.
2240          */
2241         if (unlikely(!pskb_may_pull(skb, off + len)))
2242                 return -ENOMEM;
2243 
2244         skb_postpull_rcsum(skb, skb->data + off, len);
2245         memmove(skb->data + len, skb->data, off);
2246         __skb_pull(skb, len);
2247 
2248         return 0;
2249 }
2250 
2251 static int bpf_skb_net_hdr_push(struct sk_buff *skb, u32 off, u32 len)
2252 {
2253         bool trans_same = skb->transport_header == skb->network_header;
2254         int ret;
2255 
2256         /* There's no need for __skb_push()/__skb_pull() pair to
2257          * get to the start of the mac header as we're guaranteed
2258          * to always start from here under eBPF.
2259          */
2260         ret = bpf_skb_generic_push(skb, off, len);
2261         if (likely(!ret)) {
2262                 skb->mac_header -= len;
2263                 skb->network_header -= len;
2264                 if (trans_same)
2265                         skb->transport_header = skb->network_header;
2266         }
2267 
2268         return ret;
2269 }
2270 
2271 static int bpf_skb_net_hdr_pop(struct sk_buff *skb, u32 off, u32 len)
2272 {
2273         bool trans_same = skb->transport_header == skb->network_header;
2274         int ret;
2275 
2276         /* Same here, __skb_push()/__skb_pull() pair not needed. */
2277         ret = bpf_skb_generic_pop(skb, off, len);
2278         if (likely(!ret)) {
2279                 skb->mac_header += len;
2280                 skb->network_header += len;
2281                 if (trans_same)
2282                         skb->transport_header = skb->network_header;
2283         }
2284 
2285         return ret;
2286 }
2287 
2288 static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
2289 {
2290         const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
2291         u32 off = skb_mac_header_len(skb);
2292         int ret;
2293 
2294         /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
2295         if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2296                 return -ENOTSUPP;
2297 
2298         ret = skb_cow(skb, len_diff);
2299         if (unlikely(ret < 0))
2300                 return ret;
2301 
2302         ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2303         if (unlikely(ret < 0))
2304                 return ret;
2305 
2306         if (skb_is_gso(skb)) {
2307                 struct skb_shared_info *shinfo = skb_shinfo(skb);
2308 
2309                 /* SKB_GSO_TCPV4 needs to be changed into
2310                  * SKB_GSO_TCPV6.
2311                  */
2312                 if (shinfo->gso_type & SKB_GSO_TCPV4) {
2313                         shinfo->gso_type &= ~SKB_GSO_TCPV4;
2314                         shinfo->gso_type |=  SKB_GSO_TCPV6;
2315                 }
2316 
2317                 /* Due to IPv6 header, MSS needs to be downgraded. */
2318                 skb_decrease_gso_size(shinfo, len_diff);
2319                 /* Header must be checked, and gso_segs recomputed. */
2320                 shinfo->gso_type |= SKB_GSO_DODGY;
2321                 shinfo->gso_segs = 0;
2322         }
2323 
2324         skb->protocol = htons(ETH_P_IPV6);
2325         skb_clear_hash(skb);
2326 
2327         return 0;
2328 }
2329 
2330 static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
2331 {
2332         const u32 len_diff = sizeof(struct ipv6hdr) - sizeof(struct iphdr);
2333         u32 off = skb_mac_header_len(skb);
2334         int ret;
2335 
2336         /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
2337         if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2338                 return -ENOTSUPP;
2339 
2340         ret = skb_unclone(skb, GFP_ATOMIC);
2341         if (unlikely(ret < 0))
2342                 return ret;
2343 
2344         ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2345         if (unlikely(ret < 0))
2346                 return ret;
2347 
2348         if (skb_is_gso(skb)) {
2349                 struct skb_shared_info *shinfo = skb_shinfo(skb);
2350 
2351                 /* SKB_GSO_TCPV6 needs to be changed into
2352                  * SKB_GSO_TCPV4.
2353                  */
2354                 if (shinfo->gso_type & SKB_GSO_TCPV6) {
2355                         shinfo->gso_type &= ~SKB_GSO_TCPV6;
2356                         shinfo->gso_type |=  SKB_GSO_TCPV4;
2357                 }
2358 
2359                 /* Due to IPv4 header, MSS can be upgraded. */
2360                 skb_increase_gso_size(shinfo, len_diff);
2361                 /* Header must be checked, and gso_segs recomputed. */
2362                 shinfo->gso_type |= SKB_GSO_DODGY;
2363                 shinfo->gso_segs = 0;
2364         }
2365 
2366         skb->protocol = htons(ETH_P_IP);
2367         skb_clear_hash(skb);
2368 
2369         return 0;
2370 }
2371 
2372 static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto)
2373 {
2374         __be16 from_proto = skb->protocol;
2375 
2376         if (from_proto == htons(ETH_P_IP) &&
2377               to_proto == htons(ETH_P_IPV6))
2378                 return bpf_skb_proto_4_to_6(skb);
2379 
2380         if (from_proto == htons(ETH_P_IPV6) &&
2381               to_proto == htons(ETH_P_IP))
2382                 return bpf_skb_proto_6_to_4(skb);
2383 
2384         return -ENOTSUPP;
2385 }
2386 
2387 BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto,
2388            u64, flags)
2389 {
2390         int ret;
2391 
2392         if (unlikely(flags))
2393                 return -EINVAL;
2394 
2395         /* General idea is that this helper does the basic groundwork
2396          * needed for changing the protocol, and eBPF program fills the
2397          * rest through bpf_skb_store_bytes(), bpf_lX_csum_replace()
2398          * and other helpers, rather than passing a raw buffer here.
2399          *
2400          * The rationale is to keep this minimal and without a need to
2401          * deal with raw packet data. F.e. even if we would pass buffers
2402          * here, the program still needs to call the bpf_lX_csum_replace()
2403          * helpers anyway. Plus, this way we keep also separation of
2404          * concerns, since f.e. bpf_skb_store_bytes() should only take
2405          * care of stores.
2406          *
2407          * Currently, additional options and extension header space are
2408          * not supported, but flags register is reserved so we can adapt
2409          * that. For offloads, we mark packet as dodgy, so that headers
2410          * need to be verified first.
2411          */
2412         ret = bpf_skb_proto_xlat(skb, proto);
2413         bpf_compute_data_pointers(skb);
2414         return ret;
2415 }
2416 
2417 static const struct bpf_func_proto bpf_skb_change_proto_proto = {
2418         .func           = bpf_skb_change_proto,
2419         .gpl_only       = false,
2420         .ret_type       = RET_INTEGER,
2421         .arg1_type      = ARG_PTR_TO_CTX,
2422         .arg2_type      = ARG_ANYTHING,
2423         .arg3_type      = ARG_ANYTHING,
2424 };
2425 
2426 BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type)
2427 {
2428         /* We only allow a restricted subset to be changed for now. */
2429         if (unlikely(!skb_pkt_type_ok(skb->pkt_type) ||
2430                      !skb_pkt_type_ok(pkt_type)))
2431                 return -EINVAL;
2432 
2433         skb->pkt_type = pkt_type;
2434         return 0;
2435 }
2436 
2437 static const struct bpf_func_proto bpf_skb_change_type_proto = {
2438         .func           = bpf_skb_change_type,
2439         .gpl_only       = false,
2440         .ret_type       = RET_INTEGER,
2441         .arg1_type      = ARG_PTR_TO_CTX,
2442         .arg2_type      = ARG_ANYTHING,
2443 };
2444 
2445 static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
2446 {
2447         switch (skb->protocol) {
2448         case htons(ETH_P_IP):
2449                 return sizeof(struct iphdr);
2450         case htons(ETH_P_IPV6):
2451                 return sizeof(struct ipv6hdr);
2452         default:
2453                 return ~0U;
2454         }
2455 }
2456 
2457 static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
2458 {
2459         u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
2460         int ret;
2461 
2462         /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
2463         if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2464                 return -ENOTSUPP;
2465 
2466         ret = skb_cow(skb, len_diff);
2467         if (unlikely(ret < 0))
2468                 return ret;
2469 
2470         ret = bpf_skb_net_hdr_push(skb, off, len_diff);
2471         if (unlikely(ret < 0))
2472                 return ret;
2473 
2474         if (skb_is_gso(skb)) {
2475                 struct skb_shared_info *shinfo = skb_shinfo(skb);
2476 
2477                 /* Due to header grow, MSS needs to be downgraded. */
2478                 skb_decrease_gso_size(shinfo, len_diff);
2479                 /* Header must be checked, and gso_segs recomputed. */
2480                 shinfo->gso_type |= SKB_GSO_DODGY;
2481                 shinfo->gso_segs = 0;
2482         }
2483 
2484         return 0;
2485 }
2486 
2487 static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
2488 {
2489         u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
2490         int ret;
2491 
2492         /* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
2493         if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
2494                 return -ENOTSUPP;
2495 
2496         ret = skb_unclone(skb, GFP_ATOMIC);
2497         if (unlikely(ret < 0))
2498                 return ret;
2499 
2500         ret = bpf_skb_net_hdr_pop(skb, off, len_diff);
2501         if (unlikely(ret < 0))
2502                 return ret;
2503 
2504         if (skb_is_gso(skb)) {
2505                 struct skb_shared_info *shinfo = skb_shinfo(skb);
2506 
2507                 /* Due to header shrink, MSS can be upgraded. */
2508                 skb_increase_gso_size(shinfo, len_diff);
2509                 /* Header must be checked, and gso_segs recomputed. */
2510                 shinfo->gso_type |= SKB_GSO_DODGY;
2511                 shinfo->gso_segs = 0;
2512         }
2513 
2514         return 0;
2515 }
2516 
2517 static u32 __bpf_skb_max_len(const struct sk_buff *skb)
2518 {
2519         return skb->dev ? skb->dev->mtu + skb->dev->hard_header_len :
2520                           SKB_MAX_ALLOC;
2521 }
2522 
2523 static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff)
2524 {
2525         bool trans_same = skb->transport_header == skb->network_header;
2526         u32 len_cur, len_diff_abs = abs(len_diff);
2527         u32 len_min = bpf_skb_net_base_len(skb);
2528         u32 len_max = __bpf_skb_max_len(skb);
2529         __be16 proto = skb->protocol;
2530         bool shrink = len_diff < 0;
2531         int ret;
2532 
2533         if (unlikely(len_diff_abs > 0xfffU))
2534                 return -EFAULT;
2535         if (unlikely(proto != htons(ETH_P_IP) &&
2536                      proto != htons(ETH_P_IPV6)))
2537                 return -ENOTSUPP;
2538 
2539         len_cur = skb->len - skb_network_offset(skb);
2540         if (skb_transport_header_was_set(skb) && !trans_same)
2541                 len_cur = skb_network_header_len(skb);
2542         if ((shrink && (len_diff_abs >= len_cur ||
2543                         len_cur - len_diff_abs < len_min)) ||
2544             (!shrink && (skb->len + len_diff_abs > len_max &&
2545                          !skb_is_gso(skb))))
2546                 return -ENOTSUPP;
2547 
2548         ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) :
2549                        bpf_skb_net_grow(skb, len_diff_abs);
2550 
2551         bpf_compute_data_pointers(skb);
2552         return ret;
2553 }
2554 
2555 BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
2556            u32, mode, u64, flags)
2557 {
2558         if (unlikely(flags))
2559                 return -EINVAL;
2560         if (likely(mode == BPF_ADJ_ROOM_NET))
2561                 return bpf_skb_adjust_net(skb, len_diff);
2562 
2563         return -ENOTSUPP;
2564 }
2565 
2566 static const struct bpf_func_proto bpf_skb_adjust_room_proto = {
2567         .func           = bpf_skb_adjust_room,
2568         .gpl_only       = false,
2569         .ret_type       = RET_INTEGER,
2570         .arg1_type      = ARG_PTR_TO_CTX,
2571         .arg2_type      = ARG_ANYTHING,
2572         .arg3_type      = ARG_ANYTHING,
2573         .arg4_type      = ARG_ANYTHING,
2574 };
2575 
2576 static u32 __bpf_skb_min_len(const struct sk_buff *skb)
2577 {
2578         u32 min_len = skb_network_offset(skb);
2579 
2580         if (skb_transport_header_was_set(skb))
2581                 min_len = skb_transport_offset(skb);
2582         if (skb->ip_summed == CHECKSUM_PARTIAL)
2583                 min_len = skb_checksum_start_offset(skb) +
2584                           skb->csum_offset + sizeof(__sum16);
2585         return min_len;
2586 }
2587 
2588 static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len)
2589 {
2590         unsigned int old_len = skb->len;
2591         int ret;
2592 
2593         ret = __skb_grow_rcsum(skb, new_len);
2594         if (!ret)
2595                 memset(skb->data + old_len, 0, new_len - old_len);
2596         return ret;
2597 }
2598 
2599 static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len)
2600 {
2601         return __skb_trim_rcsum(skb, new_len);
2602 }
2603 
2604 BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len,
2605            u64, flags)
2606 {
2607         u32 max_len = __bpf_skb_max_len(skb);
2608         u32 min_len = __bpf_skb_min_len(skb);
2609         int ret;
2610 
2611         if (unlikely(flags || new_len > max_len || new_len < min_len))
2612                 return -EINVAL;
2613         if (skb->encapsulation)
2614                 return -ENOTSUPP;
2615 
2616         /* The basic idea of this helper is that it's performing the
2617          * needed work to either grow or trim an skb, and eBPF program
2618          * rewrites the rest via helpers like bpf_skb_store_bytes(),
2619          * bpf_lX_csum_replace() and others rather than passing a raw
2620          * buffer here. This one is a slow path helper and intended
2621          * for replies with control messages.
2622          *
2623          * Like in bpf_skb_change_proto(), we want to keep this rather
2624          * minimal and without protocol specifics so that we are able
2625          * to separate concerns as in bpf_skb_store_bytes() should only
2626          * be the one responsible for writing buffers.
2627          *
2628          * It's really expected to be a slow path operation here for
2629          * control message replies, so we're implicitly linearizing,
2630          * uncloning and drop offloads from the skb by this.
2631          */
2632         ret = __bpf_try_make_writable(skb, skb->len);
2633         if (!ret) {
2634                 if (new_len > skb->len)
2635                         ret = bpf_skb_grow_rcsum(skb, new_len);
2636                 else if (new_len < skb->len)
2637                         ret = bpf_skb_trim_rcsum(skb, new_len);
2638                 if (!ret && skb_is_gso(skb))
2639                         skb_gso_reset(skb);
2640         }
2641 
2642         bpf_compute_data_pointers(skb);
2643         return ret;
2644 }
2645 
2646 static const struct bpf_func_proto bpf_skb_change_tail_proto = {
2647         .func           = bpf_skb_change_tail,
2648         .gpl_only       = false,
2649         .ret_type       = RET_INTEGER,
2650         .arg1_type      = ARG_PTR_TO_CTX,
2651         .arg2_type      = ARG_ANYTHING,
2652         .arg3_type      = ARG_ANYTHING,
2653 };
2654 
2655 BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room,
2656            u64, flags)
2657 {
2658         u32 max_len = __bpf_skb_max_len(skb);
2659         u32 new_len = skb->len + head_room;
2660         int ret;
2661 
2662         if (unlikely(flags || (!skb_is_gso(skb) && new_len > max_len) ||
2663                      new_len < skb->len))
2664                 return -EINVAL;
2665 
2666         ret = skb_cow(skb, head_room);
2667         if (likely(!ret)) {
2668                 /* Idea for this helper is that we currently only
2669                  * allow to expand on mac header. This means that
2670                  * skb->protocol network header, etc, stay as is.
2671                  * Compared to bpf_skb_change_tail(), we're more
2672                  * flexible due to not needing to linearize or
2673                  * reset GSO. Intention for this helper is to be
2674                  * used by an L3 skb that needs to push mac header
2675                  * for redirection into L2 device.
2676                  */
2677                 __skb_push(skb, head_room);
2678                 memset(skb->data, 0, head_room);
2679                 skb_reset_mac_header(skb);
2680         }
2681 
2682         bpf_compute_data_pointers(skb);
2683         return 0;
2684 }
2685 
2686 static const struct bpf_func_proto bpf_skb_change_head_proto = {
2687         .func           = bpf_skb_change_head,
2688         .gpl_only       = false,
2689         .ret_type       = RET_INTEGER,
2690         .arg1_type      = ARG_PTR_TO_CTX,
2691         .arg2_type      = ARG_ANYTHING,
2692         .arg3_type      = ARG_ANYTHING,
2693 };
2694 
2695 static unsigned long xdp_get_metalen(const struct xdp_buff *xdp)
2696 {
2697         return xdp_data_meta_unsupported(xdp) ? 0 :
2698                xdp->data - xdp->data_meta;
2699 }
2700 
2701 BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
2702 {
2703         unsigned long metalen = xdp_get_metalen(xdp);
2704         void *data_start = xdp->data_hard_start + metalen;
2705         void *data = xdp->data + offset;
2706 
2707         if (unlikely(data < data_start ||
2708                      data > xdp->data_end - ETH_HLEN))
2709                 return -EINVAL;
2710 
2711         if (metalen)
2712                 memmove(xdp->data_meta + offset,
2713                         xdp->data_meta, metalen);
2714         xdp->data_meta += offset;
2715         xdp->data = data;
2716 
2717         return 0;
2718 }
2719 
2720 static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
2721         .func           = bpf_xdp_adjust_head,
2722         .gpl_only       = false,
2723         .ret_type       = RET_INTEGER,
2724         .arg1_type      = ARG_PTR_TO_CTX,
2725         .arg2_type      = ARG_ANYTHING,
2726 };
2727 
2728 BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset)
2729 {
2730         void *meta = xdp->data_meta + offset;
2731         unsigned long metalen = xdp->data - meta;
2732 
2733         if (xdp_data_meta_unsupported(xdp))
2734                 return -ENOTSUPP;
2735         if (unlikely(meta < xdp->data_hard_start ||
2736                      meta > xdp->data))
2737                 return -EINVAL;
2738         if (unlikely((metalen & (sizeof(__u32) - 1)) ||
2739                      (metalen > 32)))
2740                 return -EACCES;
2741 
2742         xdp->data_meta = meta;
2743 
2744         return 0;
2745 }
2746 
2747 static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
2748         .func           = bpf_xdp_adjust_meta,
2749         .gpl_only       = false,
2750         .ret_type       = RET_INTEGER,
2751         .arg1_type      = ARG_PTR_TO_CTX,
2752         .arg2_type      = ARG_ANYTHING,
2753 };
2754 
2755 static int __bpf_tx_xdp(struct net_device *dev,
2756                         struct bpf_map *map,
2757                         struct xdp_buff *xdp,
2758                         u32 index)
2759 {
2760         int err;
2761 
2762         if (!dev->netdev_ops->ndo_xdp_xmit) {
2763                 return -EOPNOTSUPP;
2764         }
2765 
2766         err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2767         if (err)
2768                 return err;
2769         dev->netdev_ops->ndo_xdp_flush(dev);
2770         return 0;
2771 }
2772 
2773 static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd,
2774                             struct bpf_map *map,
2775                             struct xdp_buff *xdp,
2776                             u32 index)
2777 {
2778         int err;
2779 
2780         if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2781                 struct net_device *dev = fwd;
2782 
2783                 if (!dev->netdev_ops->ndo_xdp_xmit)
2784                         return -EOPNOTSUPP;
2785 
2786                 err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp);
2787                 if (err)
2788                         return err;
2789                 __dev_map_insert_ctx(map, index);
2790 
2791         } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) {
2792                 struct bpf_cpu_map_entry *rcpu = fwd;
2793 
2794                 err = cpu_map_enqueue(rcpu, xdp, dev_rx);
2795                 if (err)
2796                         return err;
2797                 __cpu_map_insert_ctx(map, index);
2798         }
2799         return 0;
2800 }
2801 
2802 void xdp_do_flush_map(void)
2803 {
2804         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2805         struct bpf_map *map = ri->map_to_flush;
2806 
2807         ri->map_to_flush = NULL;
2808         if (map) {
2809                 switch (map->map_type) {
2810                 case BPF_MAP_TYPE_DEVMAP:
2811                         __dev_map_flush(map);
2812                         break;
2813                 case BPF_MAP_TYPE_CPUMAP:
2814                         __cpu_map_flush(map);
2815                         break;
2816                 default:
2817                         break;
2818                 }
2819         }
2820 }
2821 EXPORT_SYMBOL_GPL(xdp_do_flush_map);
2822 
2823 static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index)
2824 {
2825         switch (map->map_type) {
2826         case BPF_MAP_TYPE_DEVMAP:
2827                 return __dev_map_lookup_elem(map, index);
2828         case BPF_MAP_TYPE_CPUMAP:
2829                 return __cpu_map_lookup_elem(map, index);
2830         default:
2831                 return NULL;
2832         }
2833 }
2834 
2835 static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog,
2836                                    unsigned long aux)
2837 {
2838         return (unsigned long)xdp_prog->aux != aux;
2839 }
2840 
2841 static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp,
2842                                struct bpf_prog *xdp_prog)
2843 {
2844         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2845         unsigned long map_owner = ri->map_owner;
2846         struct bpf_map *map = ri->map;
2847         u32 index = ri->ifindex;
2848         void *fwd = NULL;
2849         int err;
2850 
2851         ri->ifindex = 0;
2852         ri->map = NULL;
2853         ri->map_owner = 0;
2854 
2855         if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2856                 err = -EFAULT;
2857                 map = NULL;
2858                 goto err;
2859         }
2860 
2861         fwd = __xdp_map_lookup_elem(map, index);
2862         if (!fwd) {
2863                 err = -EINVAL;
2864                 goto err;
2865         }
2866         if (ri->map_to_flush && ri->map_to_flush != map)
2867                 xdp_do_flush_map();
2868 
2869         err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index);
2870         if (unlikely(err))
2871                 goto err;
2872 
2873         ri->map_to_flush = map;
2874         _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
2875         return 0;
2876 err:
2877         _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
2878         return err;
2879 }
2880 
2881 int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
2882                     struct bpf_prog *xdp_prog)
2883 {
2884         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2885         struct net_device *fwd;
2886         u32 index = ri->ifindex;
2887         int err;
2888 
2889         if (ri->map)
2890                 return xdp_do_redirect_map(dev, xdp, xdp_prog);
2891 
2892         fwd = dev_get_by_index_rcu(dev_net(dev), index);
2893         ri->ifindex = 0;
2894         if (unlikely(!fwd)) {
2895                 err = -EINVAL;
2896                 goto err;
2897         }
2898 
2899         err = __bpf_tx_xdp(fwd, NULL, xdp, 0);
2900         if (unlikely(err))
2901                 goto err;
2902 
2903         _trace_xdp_redirect(dev, xdp_prog, index);
2904         return 0;
2905 err:
2906         _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2907         return err;
2908 }
2909 EXPORT_SYMBOL_GPL(xdp_do_redirect);
2910 
2911 static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
2912 {
2913         unsigned int len;
2914 
2915         if (unlikely(!(fwd->flags & IFF_UP)))
2916                 return -ENETDOWN;
2917 
2918         len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
2919         if (skb->len > len)
2920                 return -EMSGSIZE;
2921 
2922         return 0;
2923 }
2924 
2925 static int xdp_do_generic_redirect_map(struct net_device *dev,
2926                                        struct sk_buff *skb,
2927                                        struct bpf_prog *xdp_prog)
2928 {
2929         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2930         unsigned long map_owner = ri->map_owner;
2931         struct bpf_map *map = ri->map;
2932         struct net_device *fwd = NULL;
2933         u32 index = ri->ifindex;
2934         int err = 0;
2935 
2936         ri->ifindex = 0;
2937         ri->map = NULL;
2938         ri->map_owner = 0;
2939 
2940         if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) {
2941                 err = -EFAULT;
2942                 map = NULL;
2943                 goto err;
2944         }
2945         fwd = __xdp_map_lookup_elem(map, index);
2946         if (unlikely(!fwd)) {
2947                 err = -EINVAL;
2948                 goto err;
2949         }
2950 
2951         if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
2952                 if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2953                         goto err;
2954                 skb->dev = fwd;
2955         } else {
2956                 /* TODO: Handle BPF_MAP_TYPE_CPUMAP */
2957                 err = -EBADRQC;
2958                 goto err;
2959         }
2960 
2961         _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index);
2962         return 0;
2963 err:
2964         _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
2965         return err;
2966 }
2967 
2968 int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
2969                             struct bpf_prog *xdp_prog)
2970 {
2971         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
2972         u32 index = ri->ifindex;
2973         struct net_device *fwd;
2974         int err = 0;
2975 
2976         if (ri->map)
2977                 return xdp_do_generic_redirect_map(dev, skb, xdp_prog);
2978 
2979         ri->ifindex = 0;
2980         fwd = dev_get_by_index_rcu(dev_net(dev), index);
2981         if (unlikely(!fwd)) {
2982                 err = -EINVAL;
2983                 goto err;
2984         }
2985 
2986         if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
2987                 goto err;
2988 
2989         skb->dev = fwd;
2990         _trace_xdp_redirect(dev, xdp_prog, index);
2991         return 0;
2992 err:
2993         _trace_xdp_redirect_err(dev, xdp_prog, index, err);
2994         return err;
2995 }
2996 EXPORT_SYMBOL_GPL(xdp_do_generic_redirect);
2997 
2998 BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
2999 {
3000         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3001 
3002         if (unlikely(flags))
3003                 return XDP_ABORTED;
3004 
3005         ri->ifindex = ifindex;
3006         ri->flags = flags;
3007         ri->map = NULL;
3008         ri->map_owner = 0;
3009 
3010         return XDP_REDIRECT;
3011 }
3012 
3013 static const struct bpf_func_proto bpf_xdp_redirect_proto = {
3014         .func           = bpf_xdp_redirect,
3015         .gpl_only       = false,
3016         .ret_type       = RET_INTEGER,
3017         .arg1_type      = ARG_ANYTHING,
3018         .arg2_type      = ARG_ANYTHING,
3019 };
3020 
3021 BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags,
3022            unsigned long, map_owner)
3023 {
3024         struct redirect_info *ri = this_cpu_ptr(&redirect_info);
3025 
3026         if (unlikely(flags))
3027                 return XDP_ABORTED;
3028 
3029         ri->ifindex = ifindex;
3030         ri->flags = flags;
3031         ri->map = map;
3032         ri->map_owner = map_owner;
3033 
3034         return XDP_REDIRECT;
3035 }
3036 
3037 /* Note, arg4 is hidden from users and populated by the verifier
3038  * with the right pointer.
3039  */
3040 static const struct bpf_func_proto bpf_xdp_redirect_map_proto = {
3041         .func           = bpf_xdp_redirect_map,
3042         .gpl_only       = false,
3043         .ret_type       = RET_INTEGER,
3044         .arg1_type      = ARG_CONST_MAP_PTR,
3045         .arg2_type      = ARG_ANYTHING,
3046         .arg3_type      = ARG_ANYTHING,
3047 };
3048 
3049 bool bpf_helper_changes_pkt_data(void *func)
3050 {
3051         if (func == bpf_skb_vlan_push ||
3052             func == bpf_skb_vlan_pop ||
3053             func == bpf_skb_store_bytes ||
3054             func == bpf_skb_change_proto ||
3055             func == bpf_skb_change_head ||
3056             func == bpf_skb_change_tail ||
3057             func == bpf_skb_adjust_room ||
3058             func == bpf_skb_pull_data ||
3059             func == bpf_clone_redirect ||
3060             func == bpf_l3_csum_replace ||
3061             func == bpf_l4_csum_replace ||
3062             func == bpf_xdp_adjust_head ||
3063             func == bpf_xdp_adjust_meta ||
3064             func == bpf_msg_pull_data)
3065                 return true;
3066 
3067         return false;
3068 }
3069 
3070 static unsigned long bpf_skb_copy(void *dst_buff, const void *skb,
3071                                   unsigned long off, unsigned long len)
3072 {
3073         void *ptr = skb_header_pointer(skb, off, len, dst_buff);
3074 
3075         if (unlikely(!ptr))
3076                 return len;
3077         if (ptr != dst_buff)
3078                 memcpy(dst_buff, ptr, len);
3079 
3080         return 0;
3081 }
3082 
3083 BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map,
3084            u64, flags, void *, meta, u64, meta_size)
3085 {
3086         u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
3087 
3088         if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3089                 return -EINVAL;
3090         if (unlikely(skb_size > skb->len))
3091                 return -EFAULT;
3092 
3093         return bpf_event_output(map, flags, meta, meta_size, skb, skb_size,
3094                                 bpf_skb_copy);
3095 }
3096 
3097 static const struct bpf_func_proto bpf_skb_event_output_proto = {
3098         .func           = bpf_skb_event_output,
3099         .gpl_only       = true,
3100         .ret_type       = RET_INTEGER,
3101         .arg1_type      = ARG_PTR_TO_CTX,
3102         .arg2_type      = ARG_CONST_MAP_PTR,
3103         .arg3_type      = ARG_ANYTHING,
3104         .arg4_type      = ARG_PTR_TO_MEM,
3105         .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
3106 };
3107 
3108 static unsigned short bpf_tunnel_key_af(u64 flags)
3109 {
3110         return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET;
3111 }
3112 
3113 BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to,
3114            u32, size, u64, flags)
3115 {
3116         const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3117         u8 compat[sizeof(struct bpf_tunnel_key)];
3118         void *to_orig = to;
3119         int err;
3120 
3121         if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
3122                 err = -EINVAL;
3123                 goto err_clear;
3124         }
3125         if (ip_tunnel_info_af(info) != bpf_tunnel_key_af(flags)) {
3126                 err = -EPROTO;
3127                 goto err_clear;
3128         }
3129         if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3130                 err = -EINVAL;
3131                 switch (size) {
3132                 case offsetof(struct bpf_tunnel_key, tunnel_label):
3133                 case offsetof(struct bpf_tunnel_key, tunnel_ext):
3134                         goto set_compat;
3135                 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3136                         /* Fixup deprecated structure layouts here, so we have
3137                          * a common path later on.
3138                          */
3139                         if (ip_tunnel_info_af(info) != AF_INET)
3140                                 goto err_clear;
3141 set_compat:
3142                         to = (struct bpf_tunnel_key *)compat;
3143                         break;
3144                 default:
3145                         goto err_clear;
3146                 }
3147         }
3148 
3149         to->tunnel_id = be64_to_cpu(info->key.tun_id);
3150         to->tunnel_tos = info->key.tos;
3151         to->tunnel_ttl = info->key.ttl;
3152 
3153         if (flags & BPF_F_TUNINFO_IPV6) {
3154                 memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
3155                        sizeof(to->remote_ipv6));
3156                 to->tunnel_label = be32_to_cpu(info->key.label);
3157         } else {
3158                 to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
3159         }
3160 
3161         if (unlikely(size != sizeof(struct bpf_tunnel_key)))
3162                 memcpy(to_orig, to, size);
3163 
3164         return 0;
3165 err_clear:
3166         memset(to_orig, 0, size);
3167         return err;
3168 }
3169 
3170 static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = {
3171         .func           = bpf_skb_get_tunnel_key,
3172         .gpl_only       = false,
3173         .ret_type       = RET_INTEGER,
3174         .arg1_type      = ARG_PTR_TO_CTX,
3175         .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
3176         .arg3_type      = ARG_CONST_SIZE,
3177         .arg4_type      = ARG_ANYTHING,
3178 };
3179 
3180 BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size)
3181 {
3182         const struct ip_tunnel_info *info = skb_tunnel_info(skb);
3183         int err;
3184 
3185         if (unlikely(!info ||
3186                      !(info->key.tun_flags & TUNNEL_OPTIONS_PRESENT))) {
3187                 err = -ENOENT;
3188                 goto err_clear;
3189         }
3190         if (unlikely(size < info->options_len)) {
3191                 err = -ENOMEM;
3192                 goto err_clear;
3193         }
3194 
3195         ip_tunnel_info_opts_get(to, info);
3196         if (size > info->options_len)
3197                 memset(to + info->options_len, 0, size - info->options_len);
3198 
3199         return info->options_len;
3200 err_clear:
3201         memset(to, 0, size);
3202         return err;
3203 }
3204 
3205 static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = {
3206         .func           = bpf_skb_get_tunnel_opt,
3207         .gpl_only       = false,
3208         .ret_type       = RET_INTEGER,
3209         .arg1_type      = ARG_PTR_TO_CTX,
3210         .arg2_type      = ARG_PTR_TO_UNINIT_MEM,
3211         .arg3_type      = ARG_CONST_SIZE,
3212 };
3213 
3214 static struct metadata_dst __percpu *md_dst;
3215 
3216 BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
3217            const struct bpf_tunnel_key *, from, u32, size, u64, flags)
3218 {
3219         struct metadata_dst *md = this_cpu_ptr(md_dst);
3220         u8 compat[sizeof(struct bpf_tunnel_key)];
3221         struct ip_tunnel_info *info;
3222 
3223         if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
3224                                BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
3225                 return -EINVAL;
3226         if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
3227                 switch (size) {
3228                 case offsetof(struct bpf_tunnel_key, tunnel_label):
3229                 case offsetof(struct bpf_tunnel_key, tunnel_ext):
3230                 case offsetof(struct bpf_tunnel_key, remote_ipv6[1]):
3231                         /* Fixup deprecated structure layouts here, so we have
3232                          * a common path later on.
3233                          */
3234                         memcpy(compat, from, size);
3235                         memset(compat + size, 0, sizeof(compat) - size);
3236                         from = (const struct bpf_tunnel_key *) compat;
3237                         break;
3238                 default:
3239                         return -EINVAL;
3240                 }
3241         }
3242         if (unlikely((!(flags & BPF_F_TUNINFO_IPV6) && from->tunnel_label) ||
3243                      from->tunnel_ext))
3244                 return -EINVAL;
3245 
3246         skb_dst_drop(skb);
3247         dst_hold((struct dst_entry *) md);
3248         skb_dst_set(skb, (struct dst_entry *) md);
3249 
3250         info = &md->u.tun_info;
3251         memset(info, 0, sizeof(*info));
3252         info->mode = IP_TUNNEL_INFO_TX;
3253 
3254         info->key.tun_flags = TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_NOCACHE;
3255         if (flags & BPF_F_DONT_FRAGMENT)
3256                 info->key.tun_flags |= TUNNEL_DONT_FRAGMENT;
3257         if (flags & BPF_F_ZERO_CSUM_TX)
3258                 info->key.tun_flags &= ~TUNNEL_CSUM;
3259         if (flags & BPF_F_SEQ_NUMBER)
3260                 info->key.tun_flags |= TUNNEL_SEQ;
3261 
3262         info->key.tun_id = cpu_to_be64(from->tunnel_id);
3263         info->key.tos = from->tunnel_tos;
3264         info->key.ttl = from->tunnel_ttl;
3265 
3266         if (flags & BPF_F_TUNINFO_IPV6) {
3267                 info->mode |= IP_TUNNEL_INFO_IPV6;
3268                 memcpy(&info->key.u.ipv6.dst, from->remote_ipv6,
3269                        sizeof(from->remote_ipv6));
3270                 info->key.label = cpu_to_be32(from->tunnel_label) &
3271                                   IPV6_FLOWLABEL_MASK;
3272         } else {
3273                 info->key.u.ipv4.dst = cpu_to_be32(from->remote_ipv4);
3274         }
3275 
3276         return 0;
3277 }
3278 
3279 static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = {
3280         .func           = bpf_skb_set_tunnel_key,
3281         .gpl_only       = false,
3282         .ret_type       = RET_INTEGER,
3283         .arg1_type      = ARG_PTR_TO_CTX,
3284         .arg2_type      = ARG_PTR_TO_MEM,
3285         .arg3_type      = ARG_CONST_SIZE,
3286         .arg4_type      = ARG_ANYTHING,
3287 };
3288 
3289 BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb,
3290            const u8 *, from, u32, size)
3291 {
3292         struct ip_tunnel_info *info = skb_tunnel_info(skb);
3293         const struct metadata_dst *md = this_cpu_ptr(md_dst);
3294 
3295         if (unlikely(info != &md->u.tun_info || (size & (sizeof(u32) - 1))))
3296                 return -EINVAL;
3297         if (unlikely(size > IP_TUNNEL_OPTS_MAX))
3298                 return -ENOMEM;
3299 
3300         ip_tunnel_info_opts_set(info, from, size);
3301 
3302         return 0;
3303 }
3304 
3305 static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
3306         .func           = bpf_skb_set_tunnel_opt,
3307         .gpl_only       = false,
3308         .ret_type       = RET_INTEGER,
3309         .arg1_type      = ARG_PTR_TO_CTX,
3310         .arg2_type      = ARG_PTR_TO_MEM,
3311         .arg3_type      = ARG_CONST_SIZE,
3312 };
3313 
3314 static const struct bpf_func_proto *
3315 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
3316 {
3317         if (!md_dst) {
3318                 struct metadata_dst __percpu *tmp;
3319 
3320                 tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX,
3321                                                 METADATA_IP_TUNNEL,
3322                                                 GFP_KERNEL);
3323                 if (!tmp)
3324                         return NULL;
3325                 if (cmpxchg(&md_dst, NULL, tmp))
3326                         metadata_dst_free_percpu(tmp);
3327         }
3328 
3329         switch (which) {
3330         case BPF_FUNC_skb_set_tunnel_key:
3331                 return &bpf_skb_set_tunnel_key_proto;
3332         case BPF_FUNC_skb_set_tunnel_opt:
3333                 return &bpf_skb_set_tunnel_opt_proto;
3334         default:
3335                 return NULL;
3336         }
3337 }
3338 
3339 BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map,
3340            u32, idx)
3341 {
3342         struct bpf_array *array = container_of(map, struct bpf_array, map);
3343         struct cgroup *cgrp;
3344         struct sock *sk;
3345 
3346         sk = skb_to_full_sk(skb);
3347         if (!sk || !sk_fullsock(sk))
3348                 return -ENOENT;
3349         if (unlikely(idx >= array->map.max_entries))
3350                 return -E2BIG;
3351 
3352         cgrp = READ_ONCE(array->ptrs[idx]);
3353         if (unlikely(!cgrp))
3354                 return -EAGAIN;
3355 
3356         return sk_under_cgroup_hierarchy(sk, cgrp);
3357 }
3358 
3359 static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
3360         .func           = bpf_skb_under_cgroup,
3361         .gpl_only       = false,
3362         .ret_type       = RET_INTEGER,
3363         .arg1_type      = ARG_PTR_TO_CTX,
3364         .arg2_type      = ARG_CONST_MAP_PTR,
3365         .arg3_type      = ARG_ANYTHING,
3366 };
3367 
3368 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
3369                                   unsigned long off, unsigned long len)
3370 {
3371         memcpy(dst_buff, src_buff + off, len);
3372         return 0;
3373 }
3374 
3375 BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map,
3376            u64, flags, void *, meta, u64, meta_size)
3377 {
3378         u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32;
3379 
3380         if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK)))
3381                 return -EINVAL;
3382         if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data)))
3383                 return -EFAULT;
3384 
3385         return bpf_event_output(map, flags, meta, meta_size, xdp->data,
3386                                 xdp_size, bpf_xdp_copy);
3387 }
3388 
3389 static const struct bpf_func_proto bpf_xdp_event_output_proto = {
3390         .func           = bpf_xdp_event_output,
3391         .gpl_only       = true,
3392         .ret_type       = RET_INTEGER,
3393         .arg1_type      = ARG_PTR_TO_CTX,
3394         .arg2_type      = ARG_CONST_MAP_PTR,
3395         .arg3_type      = ARG_ANYTHING,
3396         .arg4_type      = ARG_PTR_TO_MEM,
3397         .arg5_type      = ARG_CONST_SIZE_OR_ZERO,
3398 };
3399 
3400 BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb)
3401 {
3402         return skb->sk ? sock_gen_cookie(skb->sk) : 0;
3403 }
3404 
3405 static const struct bpf_func_proto bpf_get_socket_cookie_proto = {
3406         .func           = bpf_get_socket_cookie,
3407         .gpl_only       = false,
3408         .ret_type       = RET_INTEGER,
3409         .arg1_type      = ARG_PTR_TO_CTX,
3410 };
3411 
3412 BPF_CALL_1(bpf_get_socket_uid, struct sk_buff *, skb)
3413 {
3414         struct sock *sk = sk_to_full_sk(skb->sk);
3415         kuid_t kuid;
3416 
3417         if (!sk || !sk_fullsock(sk))
3418                 return overflowuid;
3419         kuid = sock_net_uid(sock_net(sk), sk);
3420         return from_kuid_munged(sock_net(sk)->user_ns, kuid);
3421 }
3422 
3423 static const struct bpf_func_proto bpf_get_socket_uid_proto = {
3424         .func           = bpf_get_socket_uid,
3425         .gpl_only       = false,
3426         .ret_type       = RET_INTEGER,
3427         .arg1_type      = ARG_PTR_TO_CTX,
3428 };
3429 
3430 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3431            int, level, int, optname, char *, optval, int, optlen)
3432 {
3433         struct sock *sk = bpf_sock->sk;
3434         int ret = 0;
3435         int val;
3436 
3437         if (!sk_fullsock(sk))
3438                 return -EINVAL;
3439 
3440         if (level == SOL_SOCKET) {
3441                 if (optlen != sizeof(int))
3442                         return -EINVAL;
3443                 val = *((int *)optval);
3444 
3445                 /* Only some socketops are supported */
3446                 switch (optname) {
3447                 case SO_RCVBUF:
3448                         sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
3449                         sk->sk_rcvbuf = max_t(int, val * 2, SOCK_MIN_RCVBUF);
3450                         break;
3451                 case SO_SNDBUF:
3452                         sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
3453                         sk->sk_sndbuf = max_t(int, val * 2, SOCK_MIN_SNDBUF);
3454                         break;
3455                 case SO_MAX_PACING_RATE:
3456                         sk->sk_max_pacing_rate = val;
3457                         sk->sk_pacing_rate = min(sk->sk_pacing_rate,
3458                                                  sk->sk_max_pacing_rate);
3459                         break;
3460                 case SO_PRIORITY:
3461                         sk->sk_priority = val;
3462                         break;
3463                 case SO_RCVLOWAT:
3464                         if (val < 0)
3465                                 val = INT_MAX;
3466                         sk->sk_rcvlowat = val ? : 1;
3467                         break;
3468                 case SO_MARK:
3469                         sk->sk_mark = val;
3470                         break;
3471                 default:
3472                         ret = -EINVAL;
3473                 }
3474 #ifdef CONFIG_INET
3475         } else if (level == SOL_IP) {
3476                 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
3477                         return -EINVAL;
3478 
3479                 val = *((int *)optval);
3480                 /* Only some options are supported */
3481                 switch (optname) {
3482                 case IP_TOS:
3483                         if (val < -1 || val > 0xff) {
3484                                 ret = -EINVAL;
3485                         } else {
3486                                 struct inet_sock *inet = inet_sk(sk);
3487 
3488                                 if (val == -1)
3489                                         val = 0;
3490                                 inet->tos = val;
3491                         }
3492                         break;
3493                 default:
3494                         ret = -EINVAL;
3495                 }
3496 #if IS_ENABLED(CONFIG_IPV6)
3497         } else if (level == SOL_IPV6) {
3498                 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
3499                         return -EINVAL;
3500 
3501                 val = *((int *)optval);
3502                 /* Only some options are supported */
3503                 switch (optname) {
3504                 case IPV6_TCLASS:
3505                         if (val < -1 || val > 0xff) {
3506                                 ret = -EINVAL;
3507                         } else {
3508                                 struct ipv6_pinfo *np = inet6_sk(sk);
3509 
3510                                 if (val == -1)
3511                                         val = 0;
3512                                 np->tclass = val;
3513                         }
3514                         break;
3515                 default:
3516                         ret = -EINVAL;
3517                 }
3518 #endif
3519         } else if (level == SOL_TCP &&
3520                    sk->sk_prot->setsockopt == tcp_setsockopt) {
3521                 if (optname == TCP_CONGESTION) {
3522                         char name[TCP_CA_NAME_MAX];
3523                         bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
3524 
3525                         strncpy(name, optval, min_t(long, optlen,
3526                                                     TCP_CA_NAME_MAX-1));
3527                         name[TCP_CA_NAME_MAX-1] = 0;
3528                         ret = tcp_set_congestion_control(sk, name, false,
3529                                                          reinit);
3530                 } else {
3531                         struct tcp_sock *tp = tcp_sk(sk);
3532 
3533                         if (optlen != sizeof(int))
3534                                 return -EINVAL;
3535 
3536                         val = *((int *)optval);
3537                         /* Only some options are supported */
3538                         switch (optname) {
3539                         case TCP_BPF_IW:
3540                                 if (val <= 0 || tp->data_segs_out > 0)
3541                                         ret = -EINVAL;
3542                                 else
3543                                         tp->snd_cwnd = val;
3544                                 break;
3545                         case TCP_BPF_SNDCWND_CLAMP:
3546                                 if (val <= 0) {
3547                                         ret = -EINVAL;
3548                                 } else {
3549                                         tp->snd_cwnd_clamp = val;
3550                                         tp->snd_ssthresh = val;
3551                                 }
3552                                 break;
3553                         default:
3554                                 ret = -EINVAL;
3555                         }
3556                 }
3557 #endif
3558         } else {
3559                 ret = -EINVAL;
3560         }
3561         return ret;
3562 }
3563 
3564 static const struct bpf_func_proto bpf_setsockopt_proto = {
3565         .func           = bpf_setsockopt,
3566         .gpl_only       = false,
3567         .ret_type       = RET_INTEGER,
3568         .arg1_type      = ARG_PTR_TO_CTX,
3569         .arg2_type      = ARG_ANYTHING,
3570         .arg3_type      = ARG_ANYTHING,
3571         .arg4_type      = ARG_PTR_TO_MEM,
3572         .arg5_type      = ARG_CONST_SIZE,
3573 };
3574 
3575 BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
3576            int, level, int, optname, char *, optval, int, optlen)
3577 {
3578         struct sock *sk = bpf_sock->sk;
3579 
3580         if (!sk_fullsock(sk))
3581                 goto err_clear;
3582 
3583 #ifdef CONFIG_INET
3584         if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
3585                 if (optname == TCP_CONGESTION) {
3586                         struct inet_connection_sock *icsk = inet_csk(sk);
3587 
3588                         if (!icsk->icsk_ca_ops || optlen <= 1)
3589                                 goto err_clear;
3590                         strncpy(optval, icsk->icsk_ca_ops->name, optlen);
3591                         optval[optlen - 1] = 0;
3592                 } else {
3593                         goto err_clear;
3594                 }
3595         } else if (level == SOL_IP) {
3596                 struct inet_sock *inet = inet_sk(sk);
3597 
3598                 if (optlen != sizeof(int) || sk->sk_family != AF_INET)
3599                         goto err_clear;
3600 
3601                 /* Only some options are supported */
3602                 switch (optname) {
3603                 case IP_TOS:
3604                         *((int *)optval) = (int)inet->tos;
3605                         break;
3606                 default:
3607                         goto err_clear;
3608                 }
3609 #if IS_ENABLED(CONFIG_IPV6)
3610         } else if (level == SOL_IPV6) {
3611                 struct ipv6_pinfo *np = inet6_sk(sk);
3612 
3613                 if (optlen != sizeof(int) || sk->sk_family != AF_INET6)
3614                         goto err_clear;
3615 
3616                 /* Only some options are supported */
3617                 switch (optname) {
3618                 case IPV6_TCLASS:
3619                         *((int *)optval) = (int)np->tclass;
3620                         break;
3621                 default:
3622                         goto err_clear;
3623                 }
3624 #endif
3625         } else {
3626                 goto err_clear;
3627         }
3628         return 0;
3629 #endif
3630 err_clear:
3631         memset(optval, 0, optlen);
3632         return -EINVAL;
3633 }
3634 
3635 static const struct bpf_func_proto bpf_getsockopt_proto = {
3636         .func           = bpf_getsockopt,
3637         .gpl_only       = false,
3638         .ret_type       = RET_INTEGER,
3639         .arg1_type      = ARG_PTR_TO_CTX,
3640         .arg2_type      = ARG_ANYTHING,
3641         .arg3_type      = ARG_ANYTHING,
3642         .arg4_type      = ARG_PTR_TO_UNINIT_MEM,
3643         .arg5_type      = ARG_CONST_SIZE,
3644 };
3645 
3646 BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock,
3647            int, argval)
3648 {
3649         struct sock *sk = bpf_sock->sk;
3650         int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS;
3651 
3652         if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk))
3653                 return -EINVAL;
3654 
3655         if (val)
3656                 tcp_sk(sk)->bpf_sock_ops_cb_flags = val;
3657 
3658         return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS);
3659 }
3660 
3661 static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = {
3662         .func           = bpf_sock_ops_cb_flags_set,
3663         .gpl_only       = false,
3664         .ret_type       = RET_INTEGER,
3665         .arg1_type      = ARG_PTR_TO_CTX,
3666         .arg2_type      = ARG_ANYTHING,
3667 };
3668 
3669 const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly;
3670 EXPORT_SYMBOL_GPL(ipv6_bpf_stub);
3671 
3672 BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
3673            int, addr_len)
3674 {
3675 #ifdef CONFIG_INET
3676         struct sock *sk = ctx->sk;
3677         int err;
3678 
3679         /* Binding to port can be expensive so it's prohibited in the helper.
3680          * Only binding to IP is supported.
3681          */
3682         err = -EINVAL;
3683         if (addr->sa_family == AF_INET) {
3684                 if (addr_len < sizeof(struct sockaddr_in))
3685                         return err;
3686                 if (((struct sockaddr_in *)addr)->sin_port != htons(0))
3687                         return err;
3688                 return __inet_bind(sk, addr, addr_len, true, false);
3689 #if IS_ENABLED(CONFIG_IPV6)
3690         } else if (addr->sa_family == AF_INET6) {
3691                 if (addr_len < SIN6_LEN_RFC2133)
3692                         return err;
3693                 if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
3694                         return err;
3695                 /* ipv6_bpf_stub cannot be NULL, since it's called from
3696                  * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
3697                  */
3698                 return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
3699 #endif /* CONFIG_IPV6 */
3700         }
3701 #endif /* CONFIG_INET */
3702 
3703         return -EAFNOSUPPORT;
3704 }
3705 
3706 static const struct bpf_func_proto bpf_bind_proto = {
3707         .func           = bpf_bind,
3708         .gpl_only       = false,
3709         .ret_type       = RET_INTEGER,
3710         .arg1_type      = ARG_PTR_TO_CTX,
3711         .arg2_type      = ARG_PTR_TO_MEM,
3712         .arg3_type      = ARG_CONST_SIZE,
3713 };
3714 
3715 static const struct bpf_func_proto *
3716 bpf_base_func_proto(enum bpf_func_id func_id)
3717 {
3718         switch (func_id) {
3719         case BPF_FUNC_map_lookup_elem:
3720                 return &bpf_map_lookup_elem_proto;
3721         case BPF_FUNC_map_update_elem:
3722                 return &bpf_map_update_elem_proto;
3723         case BPF_FUNC_map_delete_elem:
3724                 return &bpf_map_delete_elem_proto;
3725         case BPF_FUNC_get_prandom_u32:
3726                 return &bpf_get_prandom_u32_proto;
3727         case BPF_FUNC_get_smp_processor_id:
3728                 return &bpf_get_raw_smp_processor_id_proto;
3729         case BPF_FUNC_get_numa_node_id:
3730                 return &bpf_get_numa_node_id_proto;
3731         case BPF_FUNC_tail_call:
3732                 return &bpf_tail_call_proto;
3733         case BPF_FUNC_ktime_get_ns:
3734                 return &bpf_ktime_get_ns_proto;
3735         case BPF_FUNC_trace_printk:
3736                 if (capable(CAP_SYS_ADMIN))
3737                         return bpf_get_trace_printk_proto();
3738         default:
3739                 return NULL;
3740         }
3741 }
3742 
3743 static const struct bpf_func_proto *
3744 sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3745 {
3746         switch (func_id) {
3747         /* inet and inet6 sockets are created in a process
3748          * context so there is always a valid uid/gid
3749          */
3750         case BPF_FUNC_get_current_uid_gid:
3751                 return &bpf_get_current_uid_gid_proto;
3752         default:
3753                 return bpf_base_func_proto(func_id);
3754         }
3755 }
3756 
3757 static const struct bpf_func_proto *
3758 sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3759 {
3760         switch (func_id) {
3761         /* inet and inet6 sockets are created in a process
3762          * context so there is always a valid uid/gid
3763          */
3764         case BPF_FUNC_get_current_uid_gid:
3765                 return &bpf_get_current_uid_gid_proto;
3766         case BPF_FUNC_bind:
3767                 switch (prog->expected_attach_type) {
3768                 case BPF_CGROUP_INET4_CONNECT:
3769                 case BPF_CGROUP_INET6_CONNECT:
3770                         return &bpf_bind_proto;
3771                 default:
3772                         return NULL;
3773                 }
3774         default:
3775                 return bpf_base_func_proto(func_id);
3776         }
3777 }
3778 
3779 static const struct bpf_func_proto *
3780 sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3781 {
3782         switch (func_id) {
3783         case BPF_FUNC_skb_load_bytes:
3784                 return &bpf_skb_load_bytes_proto;
3785         case BPF_FUNC_get_socket_cookie:
3786                 return &bpf_get_socket_cookie_proto;
3787         case BPF_FUNC_get_socket_uid:
3788                 return &bpf_get_socket_uid_proto;
3789         default:
3790                 return bpf_base_func_proto(func_id);
3791         }
3792 }
3793 
3794 static const struct bpf_func_proto *
3795 tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3796 {
3797         switch (func_id) {
3798         case BPF_FUNC_skb_store_bytes:
3799                 return &bpf_skb_store_bytes_proto;
3800         case BPF_FUNC_skb_load_bytes:
3801                 return &bpf_skb_load_bytes_proto;
3802         case BPF_FUNC_skb_pull_data:
3803                 return &bpf_skb_pull_data_proto;
3804         case BPF_FUNC_csum_diff:
3805                 return &bpf_csum_diff_proto;
3806         case BPF_FUNC_csum_update:
3807                 return &bpf_csum_update_proto;
3808         case BPF_FUNC_l3_csum_replace:
3809                 return &bpf_l3_csum_replace_proto;
3810         case BPF_FUNC_l4_csum_replace:
3811                 return &bpf_l4_csum_replace_proto;
3812         case BPF_FUNC_clone_redirect:
3813                 return &bpf_clone_redirect_proto;
3814         case BPF_FUNC_get_cgroup_classid:
3815                 return &bpf_get_cgroup_classid_proto;
3816         case BPF_FUNC_skb_vlan_push:
3817                 return &bpf_skb_vlan_push_proto;
3818         case BPF_FUNC_skb_vlan_pop:
3819                 return &bpf_skb_vlan_pop_proto;
3820         case BPF_FUNC_skb_change_proto:
3821                 return &bpf_skb_change_proto_proto;
3822         case BPF_FUNC_skb_change_type:
3823                 return &bpf_skb_change_type_proto;
3824         case BPF_FUNC_skb_adjust_room:
3825                 return &bpf_skb_adjust_room_proto;
3826         case BPF_FUNC_skb_change_tail:
3827                 return &bpf_skb_change_tail_proto;
3828         case BPF_FUNC_skb_get_tunnel_key:
3829                 return &bpf_skb_get_tunnel_key_proto;
3830         case BPF_FUNC_skb_set_tunnel_key:
3831                 return bpf_get_skb_set_tunnel_proto(func_id);
3832         case BPF_FUNC_skb_get_tunnel_opt:
3833                 return &bpf_skb_get_tunnel_opt_proto;
3834         case BPF_FUNC_skb_set_tunnel_opt:
3835                 return bpf_get_skb_set_tunnel_proto(func_id);
3836         case BPF_FUNC_redirect:
3837                 return &bpf_redirect_proto;
3838         case BPF_FUNC_get_route_realm:
3839                 return &bpf_get_route_realm_proto;
3840         case BPF_FUNC_get_hash_recalc:
3841                 return &bpf_get_hash_recalc_proto;
3842         case BPF_FUNC_set_hash_invalid:
3843                 return &bpf_set_hash_invalid_proto;
3844         case BPF_FUNC_set_hash:
3845                 return &bpf_set_hash_proto;
3846         case BPF_FUNC_perf_event_output:
3847                 return &bpf_skb_event_output_proto;
3848         case BPF_FUNC_get_smp_processor_id:
3849                 return &bpf_get_smp_processor_id_proto;
3850         case BPF_FUNC_skb_under_cgroup:
3851                 return &bpf_skb_under_cgroup_proto;
3852         case BPF_FUNC_get_socket_cookie:
3853                 return &bpf_get_socket_cookie_proto;
3854         case BPF_FUNC_get_socket_uid:
3855                 return &bpf_get_socket_uid_proto;
3856         default:
3857                 return bpf_base_func_proto(func_id);
3858         }
3859 }
3860 
3861 static const struct bpf_func_proto *
3862 xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3863 {
3864         switch (func_id) {
3865         case BPF_FUNC_perf_event_output:
3866                 return &bpf_xdp_event_output_proto;
3867         case BPF_FUNC_get_smp_processor_id:
3868                 return &bpf_get_smp_processor_id_proto;
3869         case BPF_FUNC_csum_diff:
3870                 return &bpf_csum_diff_proto;
3871         case BPF_FUNC_xdp_adjust_head:
3872                 return &bpf_xdp_adjust_head_proto;
3873         case BPF_FUNC_xdp_adjust_meta:
3874                 return &bpf_xdp_adjust_meta_proto;
3875         case BPF_FUNC_redirect:
3876                 return &bpf_xdp_redirect_proto;
3877         case BPF_FUNC_redirect_map:
3878                 return &bpf_xdp_redirect_map_proto;
3879         default:
3880                 return bpf_base_func_proto(func_id);
3881         }
3882 }
3883 
3884 static const struct bpf_func_proto *
3885 lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3886 {
3887         switch (func_id) {
3888         case BPF_FUNC_skb_load_bytes:
3889                 return &bpf_skb_load_bytes_proto;
3890         case BPF_FUNC_skb_pull_data:
3891                 return &bpf_skb_pull_data_proto;
3892         case BPF_FUNC_csum_diff:
3893                 return &bpf_csum_diff_proto;
3894         case BPF_FUNC_get_cgroup_classid:
3895                 return &bpf_get_cgroup_classid_proto;
3896         case BPF_FUNC_get_route_realm:
3897                 return &bpf_get_route_realm_proto;
3898         case BPF_FUNC_get_hash_recalc:
3899                 return &bpf_get_hash_recalc_proto;
3900         case BPF_FUNC_perf_event_output:
3901                 return &bpf_skb_event_output_proto;
3902         case BPF_FUNC_get_smp_processor_id:
3903                 return &bpf_get_smp_processor_id_proto;
3904         case BPF_FUNC_skb_under_cgroup:
3905                 return &bpf_skb_under_cgroup_proto;
3906         default:
3907                 return bpf_base_func_proto(func_id);
3908         }
3909 }
3910 
3911 static const struct bpf_func_proto *
3912 sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3913 {
3914         switch (func_id) {
3915         case BPF_FUNC_setsockopt:
3916                 return &bpf_setsockopt_proto;
3917         case BPF_FUNC_getsockopt:
3918                 return &bpf_getsockopt_proto;
3919         case BPF_FUNC_sock_ops_cb_flags_set:
3920                 return &bpf_sock_ops_cb_flags_set_proto;
3921         case BPF_FUNC_sock_map_update:
3922                 return &bpf_sock_map_update_proto;
3923         default:
3924                 return bpf_base_func_proto(func_id);
3925         }
3926 }
3927 
3928 static const struct bpf_func_proto *
3929 sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3930 {
3931         switch (func_id) {
3932         case BPF_FUNC_msg_redirect_map:
3933                 return &bpf_msg_redirect_map_proto;
3934         case BPF_FUNC_msg_apply_bytes:
3935                 return &bpf_msg_apply_bytes_proto;
3936         case BPF_FUNC_msg_cork_bytes:
3937                 return &bpf_msg_cork_bytes_proto;
3938         case BPF_FUNC_msg_pull_data:
3939                 return &bpf_msg_pull_data_proto;
3940         default:
3941                 return bpf_base_func_proto(func_id);
3942         }
3943 }
3944 
3945 static const struct bpf_func_proto *
3946 sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3947 {
3948         switch (func_id) {
3949         case BPF_FUNC_skb_store_bytes:
3950                 return &bpf_skb_store_bytes_proto;
3951         case BPF_FUNC_skb_load_bytes:
3952                 return &bpf_skb_load_bytes_proto;
3953         case BPF_FUNC_skb_pull_data:
3954                 return &bpf_skb_pull_data_proto;
3955         case BPF_FUNC_skb_change_tail:
3956                 return &bpf_skb_change_tail_proto;
3957         case BPF_FUNC_skb_change_head:
3958                 return &bpf_skb_change_head_proto;
3959         case BPF_FUNC_get_socket_cookie:
3960                 return &bpf_get_socket_cookie_proto;
3961         case BPF_FUNC_get_socket_uid:
3962                 return &bpf_get_socket_uid_proto;
3963         case BPF_FUNC_sk_redirect_map:
3964                 return &bpf_sk_redirect_map_proto;
3965         default:
3966                 return bpf_base_func_proto(func_id);
3967         }
3968 }
3969 
3970 static const struct bpf_func_proto *
3971 lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
3972 {
3973         switch (func_id) {
3974         case BPF_FUNC_skb_get_tunnel_key:
3975                 return &bpf_skb_get_tunnel_key_proto;
3976         case BPF_FUNC_skb_set_tunnel_key:
3977                 return bpf_get_skb_set_tunnel_proto(func_id);
3978         case BPF_FUNC_skb_get_tunnel_opt:
3979                 return &bpf_skb_get_tunnel_opt_proto;
3980         case BPF_FUNC_skb_set_tunnel_opt:
3981                 return bpf_get_skb_set_tunnel_proto(func_id);
3982         case BPF_FUNC_redirect:
3983                 return &bpf_redirect_proto;
3984         case BPF_FUNC_clone_redirect:
3985                 return &bpf_clone_redirect_proto;
3986         case BPF_FUNC_skb_change_tail:
3987                 return &bpf_skb_change_tail_proto;
3988         case BPF_FUNC_skb_change_head:
3989                 return &bpf_skb_change_head_proto;
3990         case BPF_FUNC_skb_store_bytes:
3991                 return &bpf_skb_store_bytes_proto;
3992         case BPF_FUNC_csum_update:
3993                 return &bpf_csum_update_proto;
3994         case BPF_FUNC_l3_csum_replace:
3995                 return &bpf_l3_csum_replace_proto;
3996         case BPF_FUNC_l4_csum_replace:
3997                 return &bpf_l4_csum_replace_proto;
3998         case BPF_FUNC_set_hash_invalid:
3999                 return &bpf_set_hash_invalid_proto;
4000         default:
4001                 return lwt_inout_func_proto(func_id, prog);
4002         }
4003 }
4004 
4005 static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type,
4006                                     const struct bpf_prog *prog,
4007                                     struct bpf_insn_access_aux *info)
4008 {
4009         const int size_default = sizeof(__u32);
4010 
4011         if (off < 0 || off >= sizeof(struct __sk_buff))
4012                 return false;
4013 
4014         /* The verifier guarantees that size > 0. */
4015         if (off % size != 0)
4016                 return false;
4017 
4018         switch (off) {
4019         case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
4020                 if (off + size > offsetofend(struct __sk_buff, cb[4]))
4021                         return false;
4022                 break;
4023         case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]):
4024         case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]):
4025         case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4):
4026         case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4):
4027         case bpf_ctx_range(struct __sk_buff, data):
4028         case bpf_ctx_range(struct __sk_buff, data_meta):
4029         case bpf_ctx_range(struct __sk_buff, data_end):
4030                 if (size != size_default)
4031                         return false;
4032                 break;
4033         default:
4034                 /* Only narrow read access allowed for now. */
4035                 if (type == BPF_WRITE) {
4036                         if (size != size_default)
4037                                 return false;
4038                 } else {
4039                         bpf_ctx_record_field_size(info, size_default);
4040                         if (!bpf_ctx_narrow_access_ok(off, size, size_default))
4041                                 return false;
4042                 }
4043         }
4044 
4045         return true;
4046 }
4047 
4048 static bool sk_filter_is_valid_access(int off, int size,
4049                                       enum bpf_access_type type,
4050                                       const struct bpf_prog *prog,
4051                                       struct bpf_insn_access_aux *info)
4052 {
4053         switch (off) {
4054         case bpf_ctx_range(struct __sk_buff, tc_classid):
4055         case bpf_ctx_range(struct __sk_buff, data):
4056         case bpf_ctx_range(struct __sk_buff, data_meta):
4057         case bpf_ctx_range(struct __sk_buff, data_end):
4058         case bpf_ctx_range_till(struct __sk_buff, family, local_port):
4059                 return false;
4060         }
4061 
4062         if (type == BPF_WRITE) {
4063                 switch (off) {
4064                 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
4065                         break;
4066                 default:
4067                         return false;
4068                 }
4069         }
4070 
4071         return bpf_skb_is_valid_access(off, size, type, prog, info);
4072 }
4073 
4074 static bool lwt_is_valid_access(int off, int size,
4075                                 enum bpf_access_type type,
4076                                 const struct bpf_prog *prog,
4077                                 struct bpf_insn_access_aux *info)
4078 {
4079         switch (off) {
4080         case bpf_ctx_range(struct __sk_buff, tc_classid):
4081         case bpf_ctx_range_till(struct __sk_buff, family, local_port):
4082         case bpf_ctx_range(struct __sk_buff, data_meta):
4083                 return false;
4084         }
4085 
4086         if (type == BPF_WRITE) {
4087                 switch (off) {
4088                 case bpf_ctx_range(struct __sk_buff, mark):
4089                 case bpf_ctx_range(struct __sk_buff, priority):
4090                 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
4091                         break;
4092                 default:
4093                         return false;
4094                 }
4095         }
4096 
4097         switch (off) {
4098         case bpf_ctx_range(struct __sk_buff, data):
4099                 info->reg_type = PTR_TO_PACKET;
4100                 break;
4101         case bpf_ctx_range(struct __sk_buff, data_end):
4102                 info->reg_type = PTR_TO_PACKET_END;
4103                 break;
4104         }
4105 
4106         return bpf_skb_is_valid_access(off, size, type, prog, info);
4107 }
4108 
4109 
4110 /* Attach type specific accesses */
4111 static bool __sock_filter_check_attach_type(int off,
4112                                             enum bpf_access_type access_type,
4113                                             enum bpf_attach_type attach_type)
4114 {
4115         switch (off) {
4116         case offsetof(struct bpf_sock, bound_dev_if):
4117         case offsetof(struct bpf_sock, mark):
4118         case offsetof(struct bpf_sock, priority):
4119                 switch (attach_type) {
4120                 case BPF_CGROUP_INET_SOCK_CREATE:
4121                         goto full_access;
4122                 default:
4123                         return false;
4124                 }
4125         case bpf_ctx_range(struct bpf_sock, src_ip4):
4126                 switch (attach_type) {
4127                 case BPF_CGROUP_INET4_POST_BIND:
4128                         goto read_only;
4129                 default:
4130                         return false;
4131                 }
4132         case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4133                 switch (attach_type) {
4134                 case BPF_CGROUP_INET6_POST_BIND:
4135                         goto read_only;
4136                 default:
4137                         return false;
4138                 }
4139         case bpf_ctx_range(struct bpf_sock, src_port):
4140                 switch (attach_type) {
4141                 case BPF_CGROUP_INET4_POST_BIND:
4142                 case BPF_CGROUP_INET6_POST_BIND:
4143                         goto read_only;
4144                 default:
4145                         return false;
4146                 }
4147         }
4148 read_only:
4149         return access_type == BPF_READ;
4150 full_access:
4151         return true;
4152 }
4153 
4154 static bool __sock_filter_check_size(int off, int size,
4155                                      struct bpf_insn_access_aux *info)
4156 {
4157         const int size_default = sizeof(__u32);
4158 
4159         switch (off) {
4160         case bpf_ctx_range(struct bpf_sock, src_ip4):
4161         case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4162                 bpf_ctx_record_field_size(info, size_default);
4163                 return bpf_ctx_narrow_access_ok(off, size, size_default);
4164         }
4165 
4166         return size == size_default;
4167 }
4168 
4169 static bool sock_filter_is_valid_access(int off, int size,
4170                                         enum bpf_access_type type,
4171                                         const struct bpf_prog *prog,
4172                                         struct bpf_insn_access_aux *info)
4173 {
4174         if (off < 0 || off >= sizeof(struct bpf_sock))
4175                 return false;
4176         if (off % size != 0)
4177                 return false;
4178         if (!__sock_filter_check_attach_type(off, type,
4179                                              prog->expected_attach_type))
4180                 return false;
4181         if (!__sock_filter_check_size(off, size, info))
4182                 return false;
4183         return true;
4184 }
4185 
4186 static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
4187                                 const struct bpf_prog *prog, int drop_verdict)
4188 {
4189         struct bpf_insn *insn = insn_buf;
4190 
4191         if (!direct_write)
4192                 return 0;
4193 
4194         /* if (!skb->cloned)
4195          *       goto start;
4196          *
4197          * (Fast-path, otherwise approximation that we might be
4198          *  a clone, do the rest in helper.)
4199          */
4200         *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET());
4201         *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK);
4202         *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7);
4203 
4204         /* ret = bpf_skb_pull_data(skb, 0); */
4205         *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1);
4206         *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2);
4207         *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
4208                                BPF_FUNC_skb_pull_data);
4209         /* if (!ret)
4210          *      goto restore;
4211          * return TC_ACT_SHOT;
4212          */
4213         *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2);
4214         *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict);
4215         *insn++ = BPF_EXIT_INSN();
4216 
4217         /* restore: */
4218         *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
4219         /* start: */
4220         *insn++ = prog->insnsi[0];
4221 
4222         return insn - insn_buf;
4223 }
4224 
4225 static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write,
4226                                const struct bpf_prog *prog)
4227 {
4228         return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT);
4229 }
4230 
4231 static bool tc_cls_act_is_valid_access(int off, int size,
4232                                        enum bpf_access_type type,
4233                                        const struct bpf_prog *prog,
4234                                        struct bpf_insn_access_aux *info)
4235 {
4236         if (type == BPF_WRITE) {
4237                 switch (off) {
4238                 case bpf_ctx_range(struct __sk_buff, mark):
4239                 case bpf_ctx_range(struct __sk_buff, tc_index):
4240                 case bpf_ctx_range(struct __sk_buff, priority):
4241                 case bpf_ctx_range(struct __sk_buff, tc_classid):
4242                 case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
4243                         break;
4244                 default:
4245                         return false;
4246                 }
4247         }
4248 
4249         switch (off) {
4250         case bpf_ctx_range(struct __sk_buff, data):
4251                 info->reg_type = PTR_TO_PACKET;
4252                 break;
4253         case bpf_ctx_range(struct __sk_buff, data_meta):
4254                 info->reg_type = PTR_TO_PACKET_META;
4255                 break;
4256         case bpf_ctx_range(struct __sk_buff, data_end):
4257                 info->reg_type = PTR_TO_PACKET_END;
4258                 break;
4259         case bpf_ctx_range_till(struct __sk_buff, family, local_port):
4260                 return false;
4261         }
4262 
4263         return bpf_skb_is_valid_access(off, size, type, prog, info);
4264 }
4265 
4266 static bool __is_valid_xdp_access(int off, int size)
4267 {
4268         if (off < 0 || off >= sizeof(struct xdp_md))
4269                 return false;
4270         if (off % size != 0)
4271                 return false;
4272         if (size != sizeof(__u32))
4273                 return false;
4274 
4275         return true;
4276 }
4277 
4278 static bool xdp_is_valid_access(int off, int size,
4279                                 enum bpf_access_type type,
4280                                 const struct bpf_prog *prog,
4281                                 struct bpf_insn_access_aux *info)
4282 {
4283         if (type == BPF_WRITE)
4284                 return false;
4285 
4286         switch (off) {
4287         case offsetof(struct xdp_md, data):
4288                 info->reg_type = PTR_TO_PACKET;
4289                 break;
4290         case offsetof(struct xdp_md, data_meta):
4291                 info->reg_type = PTR_TO_PACKET_META;
4292                 break;
4293         case offsetof(struct xdp_md, data_end):
4294                 info->reg_type = PTR_TO_PACKET_END;
4295                 break;
4296         }
4297 
4298         return __is_valid_xdp_access(off, size);
4299 }
4300 
4301 void bpf_warn_invalid_xdp_action(u32 act)
4302 {
4303         const u32 act_max = XDP_REDIRECT;
4304 
4305         WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
4306                   act > act_max ? "Illegal" : "Driver unsupported",
4307                   act);
4308 }
4309 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
4310 
4311 static bool sock_addr_is_valid_access(int off, int size,
4312                                       enum bpf_access_type type,
4313                                       const struct bpf_prog *prog,
4314                                       struct bpf_insn_access_aux *info)
4315 {
4316         const int size_default = sizeof(__u32);
4317 
4318         if (off < 0 || off >= sizeof(struct bpf_sock_addr))
4319                 return false;
4320         if (off % size != 0)
4321                 return false;
4322 
4323         /* Disallow access to IPv6 fields from IPv4 contex and vise
4324          * versa.
4325          */
4326         switch (off) {
4327         case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
4328                 switch (prog->expected_attach_type) {
4329                 case BPF_CGROUP_INET4_BIND:
4330                 case BPF_CGROUP_INET4_CONNECT:
4331                         break;
4332                 default:
4333                         return false;
4334                 }
4335                 break;
4336         case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
4337                 switch (prog->expected_attach_type) {
4338                 case BPF_CGROUP_INET6_BIND:
4339                 case BPF_CGROUP_INET6_CONNECT:
4340                         break;
4341                 default:
4342                         return false;
4343                 }
4344                 break;
4345         }
4346 
4347         switch (off) {
4348         case bpf_ctx_range(struct bpf_sock_addr, user_ip4):
4349         case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
4350                 /* Only narrow read access allowed for now. */
4351                 if (type == BPF_READ) {
4352                         bpf_ctx_record_field_size(info, size_default);
4353                         if (!bpf_ctx_narrow_access_ok(off, size, size_default))
4354                                 return false;
4355                 } else {
4356                         if (size != size_default)
4357                                 return false;
4358                 }
4359                 break;
4360         case bpf_ctx_range(struct bpf_sock_addr, user_port):
4361                 if (size != size_default)
4362                         return false;
4363                 break;
4364         default:
4365                 if (type == BPF_READ) {
4366                         if (size != size_default)
4367                                 return false;
4368                 } else {
4369                         return false;
4370                 }
4371         }
4372 
4373         return true;
4374 }
4375 
4376 static bool sock_ops_is_valid_access(int off, int size,
4377                                      enum bpf_access_type type,
4378                                      const struct bpf_prog *prog,
4379                                      struct bpf_insn_access_aux *info)
4380 {
4381         const int size_default = sizeof(__u32);
4382 
4383         if (off < 0 || off >= sizeof(struct bpf_sock_ops))
4384                 return false;
4385 
4386         /* The verifier guarantees that size > 0. */
4387         if (off % size != 0)
4388                 return false;
4389 
4390         if (type == BPF_WRITE) {
4391                 switch (off) {
4392                 case offsetof(struct bpf_sock_ops, reply):
4393                 case offsetof(struct bpf_sock_ops, sk_txhash):
4394                         if (size != size_default)
4395                                 return false;
4396                         break;
4397                 default:
4398                         return false;
4399                 }
4400         } else {
4401                 switch (off) {
4402                 case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received,
4403                                         bytes_acked):
4404                         if (size != sizeof(__u64))
4405                                 return false;
4406                         break;
4407                 default:
4408                         if (size != size_default)
4409                                 return false;
4410                         break;
4411                 }
4412         }
4413 
4414         return true;
4415 }
4416 
4417 static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write,
4418                            const struct bpf_prog *prog)
4419 {
4420         return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP);
4421 }
4422 
4423 static bool sk_skb_is_valid_access(int off, int size,
4424                                    enum bpf_access_type type,
4425                                    const struct bpf_prog *prog,
4426                                    struct bpf_insn_access_aux *info)
4427 {
4428         switch (off) {
4429         case bpf_ctx_range(struct __sk_buff, tc_classid):
4430         case bpf_ctx_range(struct __sk_buff, data_meta):
4431                 return false;
4432         }
4433 
4434         if (type == BPF_WRITE) {
4435                 switch (off) {
4436                 case bpf_ctx_range(struct __sk_buff, tc_index):
4437                 case bpf_ctx_range(struct __sk_buff, priority):
4438                         break;
4439                 default:
4440                         return false;
4441                 }
4442         }
4443 
4444         switch (off) {
4445         case bpf_ctx_range(struct __sk_buff, mark):
4446                 return false;
4447         case bpf_ctx_range(struct __sk_buff, data):
4448                 info->reg_type = PTR_TO_PACKET;
4449                 break;
4450         case bpf_ctx_range(struct __sk_buff, data_end):
4451                 info->reg_type = PTR_TO_PACKET_END;
4452                 break;
4453         }
4454 
4455         return bpf_skb_is_valid_access(off, size, type, prog, info);
4456 }
4457 
4458 static bool sk_msg_is_valid_access(int off, int size,
4459                                    enum bpf_access_type type,
4460                                    const struct bpf_prog *prog,
4461                                    struct bpf_insn_access_aux *info)
4462 {
4463         if (type == BPF_WRITE)
4464                 return false;
4465 
4466         switch (off) {
4467         case offsetof(struct sk_msg_md, data):
4468                 info->reg_type = PTR_TO_PACKET;
4469                 break;
4470         case offsetof(struct sk_msg_md, data_end):
4471                 info->reg_type = PTR_TO_PACKET_END;
4472                 break;
4473         }
4474 
4475         if (off < 0 || off >= sizeof(struct sk_msg_md))
4476                 return false;
4477         if (off % size != 0)
4478                 return false;
4479         if (size != sizeof(__u64))
4480                 return false;
4481 
4482         return true;
4483 }
4484 
4485 static u32 bpf_convert_ctx_access(enum bpf_access_type type,
4486                                   const struct bpf_insn *si,
4487                                   struct bpf_insn *insn_buf,
4488                                   struct bpf_prog *prog, u32 *target_size)
4489 {
4490         struct bpf_insn *insn = insn_buf;
4491         int off;
4492 
4493         switch (si->off) {
4494         case offsetof(struct __sk_buff, len):
4495                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4496                                       bpf_target_off(struct sk_buff, len, 4,
4497                                                      target_size));
4498                 break;
4499 
4500         case offsetof(struct __sk_buff, protocol):
4501                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
4502                                       bpf_target_off(struct sk_buff, protocol, 2,
4503                                                      target_size));
4504                 break;
4505 
4506         case offsetof(struct __sk_buff, vlan_proto):
4507                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
4508                                       bpf_target_off(struct sk_buff, vlan_proto, 2,
4509                                                      target_size));
4510                 break;
4511 
4512         case offsetof(struct __sk_buff, priority):
4513                 if (type == BPF_WRITE)
4514                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4515                                               bpf_target_off(struct sk_buff, priority, 4,
4516                                                              target_size));
4517                 else
4518                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4519                                               bpf_target_off(struct sk_buff, priority, 4,
4520                                                              target_size));
4521                 break;
4522 
4523         case offsetof(struct __sk_buff, ingress_ifindex):
4524                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4525                                       bpf_target_off(struct sk_buff, skb_iif, 4,
4526                                                      target_size));
4527                 break;
4528 
4529         case offsetof(struct __sk_buff, ifindex):
4530                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
4531                                       si->dst_reg, si->src_reg,
4532                                       offsetof(struct sk_buff, dev));
4533                 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 1);
4534                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4535                                       bpf_target_off(struct net_device, ifindex, 4,
4536                                                      target_size));
4537                 break;
4538 
4539         case offsetof(struct __sk_buff, hash):
4540                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4541                                       bpf_target_off(struct sk_buff, hash, 4,
4542                                                      target_size));
4543                 break;
4544 
4545         case offsetof(struct __sk_buff, mark):
4546                 if (type == BPF_WRITE)
4547                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4548                                               bpf_target_off(struct sk_buff, mark, 4,
4549                                                              target_size));
4550                 else
4551                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4552                                               bpf_target_off(struct sk_buff, mark, 4,
4553                                                              target_size));
4554                 break;
4555 
4556         case offsetof(struct __sk_buff, pkt_type):
4557                 *target_size = 1;
4558                 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->src_reg,
4559                                       PKT_TYPE_OFFSET());
4560                 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, PKT_TYPE_MAX);
4561 #ifdef __BIG_ENDIAN_BITFIELD
4562                 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 5);
4563 #endif
4564                 break;
4565 
4566         case offsetof(struct __sk_buff, queue_mapping):
4567                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
4568                                       bpf_target_off(struct sk_buff, queue_mapping, 2,
4569                                                      target_size));
4570                 break;
4571 
4572         case offsetof(struct __sk_buff, vlan_present):
4573         case offsetof(struct __sk_buff, vlan_tci):
4574                 BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
4575 
4576                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
4577                                       bpf_target_off(struct sk_buff, vlan_tci, 2,
4578                                                      target_size));
4579                 if (si->off == offsetof(struct __sk_buff, vlan_tci)) {
4580                         *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg,
4581                                                 ~VLAN_TAG_PRESENT);
4582                 } else {
4583                         *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, 12);
4584                         *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, 1);
4585                 }
4586                 break;
4587 
4588         case offsetof(struct __sk_buff, cb[0]) ...
4589              offsetofend(struct __sk_buff, cb[4]) - 1:
4590                 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
4591                 BUILD_BUG_ON((offsetof(struct sk_buff, cb) +
4592                               offsetof(struct qdisc_skb_cb, data)) %
4593                              sizeof(__u64));
4594 
4595                 prog->cb_access = 1;
4596                 off  = si->off;
4597                 off -= offsetof(struct __sk_buff, cb[0]);
4598                 off += offsetof(struct sk_buff, cb);
4599                 off += offsetof(struct qdisc_skb_cb, data);
4600                 if (type == BPF_WRITE)
4601                         *insn++ = BPF_STX_MEM(BPF_SIZE(si->code), si->dst_reg,
4602                                               si->src_reg, off);
4603                 else
4604                         *insn++ = BPF_LDX_MEM(BPF_SIZE(si->code), si->dst_reg,
4605                                               si->src_reg, off);
4606                 break;
4607 
4608         case offsetof(struct __sk_buff, tc_classid):
4609                 BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, tc_classid) != 2);
4610 
4611                 off  = si->off;
4612                 off -= offsetof(struct __sk_buff, tc_classid);
4613                 off += offsetof(struct sk_buff, cb);
4614                 off += offsetof(struct qdisc_skb_cb, tc_classid);
4615                 *target_size = 2;
4616                 if (type == BPF_WRITE)
4617                         *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg,
4618                                               si->src_reg, off);
4619                 else
4620                         *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg,
4621                                               si->src_reg, off);
4622                 break;
4623 
4624         case offsetof(struct __sk_buff, data):
4625                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data),
4626                                       si->dst_reg, si->src_reg,
4627                                       offsetof(struct sk_buff, data));
4628                 break;
4629 
4630         case offsetof(struct __sk_buff, data_meta):
4631                 off  = si->off;
4632                 off -= offsetof(struct __sk_buff, data_meta);
4633                 off += offsetof(struct sk_buff, cb);
4634                 off += offsetof(struct bpf_skb_data_end, data_meta);
4635                 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4636                                       si->src_reg, off);
4637                 break;
4638 
4639         case offsetof(struct __sk_buff, data_end):
4640                 off  = si->off;
4641                 off -= offsetof(struct __sk_buff, data_end);
4642                 off += offsetof(struct sk_buff, cb);
4643                 off += offsetof(struct bpf_skb_data_end, data_end);
4644                 *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg,
4645                                       si->src_reg, off);
4646                 break;
4647 
4648         case offsetof(struct __sk_buff, tc_index):
4649 #ifdef CONFIG_NET_SCHED
4650                 if (type == BPF_WRITE)
4651                         *insn++ = BPF_STX_MEM(BPF_H, si->dst_reg, si->src_reg,
4652                                               bpf_target_off(struct sk_buff, tc_index, 2,
4653                                                              target_size));
4654                 else
4655                         *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
4656                                               bpf_target_off(struct sk_buff, tc_index, 2,
4657                                                              target_size));
4658 #else
4659                 *target_size = 2;
4660                 if (type == BPF_WRITE)
4661                         *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
4662                 else
4663                         *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
4664 #endif
4665                 break;
4666 
4667         case offsetof(struct __sk_buff, napi_id):
4668 #if defined(CONFIG_NET_RX_BUSY_POLL)
4669                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4670                                       bpf_target_off(struct sk_buff, napi_id, 4,
4671                                                      target_size));
4672                 *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
4673                 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
4674 #else
4675                 *target_size = 4;
4676                 *insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
4677 #endif
4678                 break;
4679         case offsetof(struct __sk_buff, family):
4680                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
4681 
4682                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4683                                       si->dst_reg, si->src_reg,
4684                                       offsetof(struct sk_buff, sk));
4685                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4686                                       bpf_target_off(struct sock_common,
4687                                                      skc_family,
4688                                                      2, target_size));
4689                 break;
4690         case offsetof(struct __sk_buff, remote_ip4):
4691                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
4692 
4693                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4694                                       si->dst_reg, si->src_reg,
4695                                       offsetof(struct sk_buff, sk));
4696                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4697                                       bpf_target_off(struct sock_common,
4698                                                      skc_daddr,
4699                                                      4, target_size));
4700                 break;
4701         case offsetof(struct __sk_buff, local_ip4):
4702                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4703                                           skc_rcv_saddr) != 4);
4704 
4705                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4706                                       si->dst_reg, si->src_reg,
4707                                       offsetof(struct sk_buff, sk));
4708                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4709                                       bpf_target_off(struct sock_common,
4710                                                      skc_rcv_saddr,
4711                                                      4, target_size));
4712                 break;
4713         case offsetof(struct __sk_buff, remote_ip6[0]) ...
4714              offsetof(struct __sk_buff, remote_ip6[3]):
4715 #if IS_ENABLED(CONFIG_IPV6)
4716                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4717                                           skc_v6_daddr.s6_addr32[0]) != 4);
4718 
4719                 off = si->off;
4720                 off -= offsetof(struct __sk_buff, remote_ip6[0]);
4721 
4722                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4723                                       si->dst_reg, si->src_reg,
4724                                       offsetof(struct sk_buff, sk));
4725                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4726                                       offsetof(struct sock_common,
4727                                                skc_v6_daddr.s6_addr32[0]) +
4728                                       off);
4729 #else
4730                 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4731 #endif
4732                 break;
4733         case offsetof(struct __sk_buff, local_ip6[0]) ...
4734              offsetof(struct __sk_buff, local_ip6[3]):
4735 #if IS_ENABLED(CONFIG_IPV6)
4736                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
4737                                           skc_v6_rcv_saddr.s6_addr32[0]) != 4);
4738 
4739                 off = si->off;
4740                 off -= offsetof(struct __sk_buff, local_ip6[0]);
4741 
4742                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4743                                       si->dst_reg, si->src_reg,
4744                                       offsetof(struct sk_buff, sk));
4745                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4746                                       offsetof(struct sock_common,
4747                                                skc_v6_rcv_saddr.s6_addr32[0]) +
4748                                       off);
4749 #else
4750                 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4751 #endif
4752                 break;
4753 
4754         case offsetof(struct __sk_buff, remote_port):
4755                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
4756 
4757                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4758                                       si->dst_reg, si->src_reg,
4759                                       offsetof(struct sk_buff, sk));
4760                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4761                                       bpf_target_off(struct sock_common,
4762                                                      skc_dport,
4763                                                      2, target_size));
4764 #ifndef __BIG_ENDIAN_BITFIELD
4765                 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
4766 #endif
4767                 break;
4768 
4769         case offsetof(struct __sk_buff, local_port):
4770                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
4771 
4772                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk),
4773                                       si->dst_reg, si->src_reg,
4774                                       offsetof(struct sk_buff, sk));
4775                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
4776                                       bpf_target_off(struct sock_common,
4777                                                      skc_num, 2, target_size));
4778                 break;
4779         }
4780 
4781         return insn - insn_buf;
4782 }
4783 
4784 static u32 sock_filter_convert_ctx_access(enum bpf_access_type type,
4785                                           const struct bpf_insn *si,
4786                                           struct bpf_insn *insn_buf,
4787                                           struct bpf_prog *prog, u32 *target_size)
4788 {
4789         struct bpf_insn *insn = insn_buf;
4790         int off;
4791 
4792         switch (si->off) {
4793         case offsetof(struct bpf_sock, bound_dev_if):
4794                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4);
4795 
4796                 if (type == BPF_WRITE)
4797                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4798                                         offsetof(struct sock, sk_bound_dev_if));
4799                 else
4800                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4801                                       offsetof(struct sock, sk_bound_dev_if));
4802                 break;
4803 
4804         case offsetof(struct bpf_sock, mark):
4805                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4);
4806 
4807                 if (type == BPF_WRITE)
4808                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4809                                         offsetof(struct sock, sk_mark));
4810                 else
4811                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4812                                       offsetof(struct sock, sk_mark));
4813                 break;
4814 
4815         case offsetof(struct bpf_sock, priority):
4816                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4);
4817 
4818                 if (type == BPF_WRITE)
4819                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
4820                                         offsetof(struct sock, sk_priority));
4821                 else
4822                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4823                                       offsetof(struct sock, sk_priority));
4824                 break;
4825 
4826         case offsetof(struct bpf_sock, family):
4827                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2);
4828 
4829                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->src_reg,
4830                                       offsetof(struct sock, sk_family));
4831                 break;
4832 
4833         case offsetof(struct bpf_sock, type):
4834                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4835                                       offsetof(struct sock, __sk_flags_offset));
4836                 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
4837                 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
4838                 break;
4839 
4840         case offsetof(struct bpf_sock, protocol):
4841                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
4842                                       offsetof(struct sock, __sk_flags_offset));
4843                 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
4844                 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
4845                 break;
4846 
4847         case offsetof(struct bpf_sock, src_ip4):
4848                 *insn++ = BPF_LDX_MEM(
4849                         BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4850                         bpf_target_off(struct sock_common, skc_rcv_saddr,
4851                                        FIELD_SIZEOF(struct sock_common,
4852                                                     skc_rcv_saddr),
4853                                        target_size));
4854                 break;
4855 
4856         case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
4857 #if IS_ENABLED(CONFIG_IPV6)
4858                 off = si->off;
4859                 off -= offsetof(struct bpf_sock, src_ip6[0]);
4860                 *insn++ = BPF_LDX_MEM(
4861                         BPF_SIZE(si->code), si->dst_reg, si->src_reg,
4862                         bpf_target_off(
4863                                 struct sock_common,
4864                                 skc_v6_rcv_saddr.s6_addr32[0],
4865                                 FIELD_SIZEOF(struct sock_common,
4866                                              skc_v6_rcv_saddr.s6_addr32[0]),
4867                                 target_size) + off);
4868 #else
4869                 (void)off;
4870                 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
4871 #endif
4872                 break;
4873 
4874         case offsetof(struct bpf_sock, src_port):
4875                 *insn++ = BPF_LDX_MEM(
4876                         BPF_FIELD_SIZEOF(struct sock_common, skc_num),
4877                         si->dst_reg, si->src_reg,
4878                         bpf_target_off(struct sock_common, skc_num,
4879                                        FIELD_SIZEOF(struct sock_common,
4880                                                     skc_num),
4881                                        target_size));
4882                 break;
4883         }
4884 
4885         return insn - insn_buf;
4886 }
4887 
4888 static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type,
4889                                          const struct bpf_insn *si,
4890                                          struct bpf_insn *insn_buf,
4891                                          struct bpf_prog *prog, u32 *target_size)
4892 {
4893         struct bpf_insn *insn = insn_buf;
4894 
4895         switch (si->off) {
4896         case offsetof(struct __sk_buff, ifindex):
4897                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev),
4898                                       si->dst_reg, si->src_reg,
4899                                       offsetof(struct sk_buff, dev));
4900                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4901                                       bpf_target_off(struct net_device, ifindex, 4,
4902                                                      target_size));
4903                 break;
4904         default:
4905                 return bpf_convert_ctx_access(type, si, insn_buf, prog,
4906                                               target_size);
4907         }
4908 
4909         return insn - insn_buf;
4910 }
4911 
4912 static u32 xdp_convert_ctx_access(enum bpf_access_type type,
4913                                   const struct bpf_insn *si,
4914                                   struct bpf_insn *insn_buf,
4915                                   struct bpf_prog *prog, u32 *target_size)
4916 {
4917         struct bpf_insn *insn = insn_buf;
4918 
4919         switch (si->off) {
4920         case offsetof(struct xdp_md, data):
4921                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data),
4922                                       si->dst_reg, si->src_reg,
4923                                       offsetof(struct xdp_buff, data));
4924                 break;
4925         case offsetof(struct xdp_md, data_meta):
4926                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta),
4927                                       si->dst_reg, si->src_reg,
4928                                       offsetof(struct xdp_buff, data_meta));
4929                 break;
4930         case offsetof(struct xdp_md, data_end):
4931                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end),
4932                                       si->dst_reg, si->src_reg,
4933                                       offsetof(struct xdp_buff, data_end));
4934                 break;
4935         case offsetof(struct xdp_md, ingress_ifindex):
4936                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4937                                       si->dst_reg, si->src_reg,
4938                                       offsetof(struct xdp_buff, rxq));
4939                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
4940                                       si->dst_reg, si->dst_reg,
4941                                       offsetof(struct xdp_rxq_info, dev));
4942                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4943                                       offsetof(struct net_device, ifindex));
4944                 break;
4945         case offsetof(struct xdp_md, rx_queue_index):
4946                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
4947                                       si->dst_reg, si->src_reg,
4948                                       offsetof(struct xdp_buff, rxq));
4949                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
4950                                       offsetof(struct xdp_rxq_info,
4951                                                queue_index));
4952                 break;
4953         }
4954 
4955         return insn - insn_buf;
4956 }
4957 
4958 /* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of
4959  * context Structure, F is Field in context structure that contains a pointer
4960  * to Nested Structure of type NS that has the field NF.
4961  *
4962  * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make
4963  * sure that SIZE is not greater than actual size of S.F.NF.
4964  *
4965  * If offset OFF is provided, the load happens from that offset relative to
4966  * offset of NF.
4967  */
4968 #define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF)          \
4969         do {                                                                   \
4970                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg,     \
4971                                       si->src_reg, offsetof(S, F));            \
4972                 *insn++ = BPF_LDX_MEM(                                         \
4973                         SIZE, si->dst_reg, si->dst_reg,                        \
4974                         bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),           \
4975                                        target_size)                            \
4976                                 + OFF);                                        \
4977         } while (0)
4978 
4979 #define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF)                              \
4980         SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF,                     \
4981                                              BPF_FIELD_SIZEOF(NS, NF), 0)
4982 
4983 /* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to
4984  * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation.
4985  *
4986  * It doesn't support SIZE argument though since narrow stores are not
4987  * supported for now.
4988  *
4989  * In addition it uses Temporary Field TF (member of struct S) as the 3rd
4990  * "register" since two registers available in convert_ctx_access are not
4991  * enough: we can't override neither SRC, since it contains value to store, nor
4992  * DST since it contains pointer to context that may be used by later
4993  * instructions. But we need a temporary place to save pointer to nested
4994  * structure whose field we want to store to.
4995  */
4996 #define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF)                \
4997         do {                                                                   \
4998                 int tmp_reg = BPF_REG_9;                                       \
4999                 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)          \
5000                         --tmp_reg;                                             \
5001                 if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg)          \
5002                         --tmp_reg;                                             \
5003                 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg,            \
5004                                       offsetof(S, TF));                        \
5005                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg,         \
5006                                       si->dst_reg, offsetof(S, F));            \
5007                 *insn++ = BPF_STX_MEM(                                         \
5008                         BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg,        \
5009                         bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF),           \
5010                                        target_size)                            \
5011                                 + OFF);                                        \
5012                 *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg,            \
5013                                       offsetof(S, TF));                        \
5014         } while (0)
5015 
5016 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \
5017                                                       TF)                      \
5018         do {                                                                   \
5019                 if (type == BPF_WRITE) {                                       \
5020                         SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF,    \
5021                                                          TF);                  \
5022                 } else {                                                       \
5023                         SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(                  \
5024                                 S, NS, F, NF, SIZE, OFF);  \
5025                 }                                                              \
5026         } while (0)
5027 
5028 #define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF)                 \
5029         SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(                         \
5030                 S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF)
5031 
5032 static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
5033                                         const struct bpf_insn *si,
5034                                         struct bpf_insn *insn_buf,
5035                                         struct bpf_prog *prog, u32 *target_size)
5036 {
5037         struct bpf_insn *insn = insn_buf;
5038         int off;
5039 
5040         switch (si->off) {
5041         case offsetof(struct bpf_sock_addr, user_family):
5042                 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
5043                                             struct sockaddr, uaddr, sa_family);
5044                 break;
5045 
5046         case offsetof(struct bpf_sock_addr, user_ip4):
5047                 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
5048                         struct bpf_sock_addr_kern, struct sockaddr_in, uaddr,
5049                         sin_addr, BPF_SIZE(si->code), 0, tmp_reg);
5050                 break;
5051 
5052         case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]):
5053                 off = si->off;
5054                 off -= offsetof(struct bpf_sock_addr, user_ip6[0]);
5055                 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
5056                         struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
5057                         sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off,
5058                         tmp_reg);
5059                 break;
5060 
5061         case offsetof(struct bpf_sock_addr, user_port):
5062                 /* To get port we need to know sa_family first and then treat
5063                  * sockaddr as either sockaddr_in or sockaddr_in6.
5064                  * Though we can simplify since port field has same offset and
5065                  * size in both structures.
5066                  * Here we check this invariant and use just one of the
5067                  * structures if it's true.
5068                  */
5069                 BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) !=
5070                              offsetof(struct sockaddr_in6, sin6_port));
5071                 BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) !=
5072                              FIELD_SIZEOF(struct sockaddr_in6, sin6_port));
5073                 SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
5074                                                      struct sockaddr_in6, uaddr,
5075                                                      sin6_port, tmp_reg);
5076                 break;
5077 
5078         case offsetof(struct bpf_sock_addr, family):
5079                 SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
5080                                             struct sock, sk, sk_family);
5081                 break;
5082 
5083         case offsetof(struct bpf_sock_addr, type):
5084                 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
5085                         struct bpf_sock_addr_kern, struct sock, sk,
5086                         __sk_flags_offset, BPF_W, 0);
5087                 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
5088                 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
5089                 break;
5090 
5091         case offsetof(struct bpf_sock_addr, protocol):
5092                 SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
5093                         struct bpf_sock_addr_kern, struct sock, sk,
5094                         __sk_flags_offset, BPF_W, 0);
5095                 *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
5096                 *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
5097                                         SK_FL_PROTO_SHIFT);
5098                 break;
5099         }
5100 
5101         return insn - insn_buf;
5102 }
5103 
5104 static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
5105                                        const struct bpf_insn *si,
5106                                        struct bpf_insn *insn_buf,
5107                                        struct bpf_prog *prog,
5108                                        u32 *target_size)
5109 {
5110         struct bpf_insn *insn = insn_buf;
5111         int off;
5112 
5113         switch (si->off) {
5114         case offsetof(struct bpf_sock_ops, op) ...
5115              offsetof(struct bpf_sock_ops, replylong[3]):
5116                 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, op) !=
5117                              FIELD_SIZEOF(struct bpf_sock_ops_kern, op));
5118                 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, reply) !=
5119                              FIELD_SIZEOF(struct bpf_sock_ops_kern, reply));
5120                 BUILD_BUG_ON(FIELD_SIZEOF(struct bpf_sock_ops, replylong) !=
5121                              FIELD_SIZEOF(struct bpf_sock_ops_kern, replylong));
5122                 off = si->off;
5123                 off -= offsetof(struct bpf_sock_ops, op);
5124                 off += offsetof(struct bpf_sock_ops_kern, op);
5125                 if (type == BPF_WRITE)
5126                         *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg,
5127                                               off);
5128                 else
5129                         *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
5130                                               off);
5131                 break;
5132 
5133         case offsetof(struct bpf_sock_ops, family):
5134                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2);
5135 
5136                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5137                                               struct bpf_sock_ops_kern, sk),
5138                                       si->dst_reg, si->src_reg,
5139                                       offsetof(struct bpf_sock_ops_kern, sk));
5140                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
5141                                       offsetof(struct sock_common, skc_family));
5142                 break;
5143 
5144         case offsetof(struct bpf_sock_ops, remote_ip4):
5145                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4);
5146 
5147                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5148                                                 struct bpf_sock_ops_kern, sk),
5149                                       si->dst_reg, si->src_reg,
5150                                       offsetof(struct bpf_sock_ops_kern, sk));
5151                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5152                                       offsetof(struct sock_common, skc_daddr));
5153                 break;
5154 
5155         case offsetof(struct bpf_sock_ops, local_ip4):
5156                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_rcv_saddr) != 4);
5157 
5158                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5159                                               struct bpf_sock_ops_kern, sk),
5160                                       si->dst_reg, si->src_reg,
5161                                       offsetof(struct bpf_sock_ops_kern, sk));
5162                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5163                                       offsetof(struct sock_common,
5164                                                skc_rcv_saddr));
5165                 break;
5166 
5167         case offsetof(struct bpf_sock_ops, remote_ip6[0]) ...
5168              offsetof(struct bpf_sock_ops, remote_ip6[3]):
5169 #if IS_ENABLED(CONFIG_IPV6)
5170                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
5171                                           skc_v6_daddr.s6_addr32[0]) != 4);
5172 
5173                 off = si->off;
5174                 off -= offsetof(struct bpf_sock_ops, remote_ip6[0]);
5175                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5176                                                 struct bpf_sock_ops_kern, sk),
5177                                       si->dst_reg, si->src_reg,
5178                                       offsetof(struct bpf_sock_ops_kern, sk));
5179                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5180                                       offsetof(struct sock_common,
5181                                                skc_v6_daddr.s6_addr32[0]) +
5182                                       off);
5183 #else
5184                 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
5185 #endif
5186                 break;
5187 
5188         case offsetof(struct bpf_sock_ops, local_ip6[0]) ...
5189              offsetof(struct bpf_sock_ops, local_ip6[3]):
5190 #if IS_ENABLED(CONFIG_IPV6)
5191                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common,
5192                                           skc_v6_rcv_saddr.s6_addr32[0]) != 4);
5193 
5194                 off = si->off;
5195                 off -= offsetof(struct bpf_sock_ops, local_ip6[0]);
5196                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5197                                                 struct bpf_sock_ops_kern, sk),
5198                                       si->dst_reg, si->src_reg,
5199                                       offsetof(struct bpf_sock_ops_kern, sk));
5200                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5201                                       offsetof(struct sock_common,
5202                                                skc_v6_rcv_saddr.s6_addr32[0]) +
5203                                       off);
5204 #else
5205                 *insn++ = BPF_MOV32_IMM(si->dst_reg, 0);
5206 #endif
5207                 break;
5208 
5209         case offsetof(struct bpf_sock_ops, remote_port):
5210                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2);
5211 
5212                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5213                                                 struct bpf_sock_ops_kern, sk),
5214                                       si->dst_reg, si->src_reg,
5215                                       offsetof(struct bpf_sock_ops_kern, sk));
5216                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
5217                                       offsetof(struct sock_common, skc_dport));
5218 #ifndef __BIG_ENDIAN_BITFIELD
5219                 *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16);
5220 #endif
5221                 break;
5222 
5223         case offsetof(struct bpf_sock_ops, local_port):
5224                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2);
5225 
5226                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5227                                                 struct bpf_sock_ops_kern, sk),
5228                                       si->dst_reg, si->src_reg,
5229                                       offsetof(struct bpf_sock_ops_kern, sk));
5230                 *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg,
5231                                       offsetof(struct sock_common, skc_num));
5232                 break;
5233 
5234         case offsetof(struct bpf_sock_ops, is_fullsock):
5235                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5236                                                 struct bpf_sock_ops_kern,
5237                                                 is_fullsock),
5238                                       si->dst_reg, si->src_reg,
5239                                       offsetof(struct bpf_sock_ops_kern,
5240                                                is_fullsock));
5241                 break;
5242 
5243         case offsetof(struct bpf_sock_ops, state):
5244                 BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_state) != 1);
5245 
5246                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5247                                                 struct bpf_sock_ops_kern, sk),
5248                                       si->dst_reg, si->src_reg,
5249                                       offsetof(struct bpf_sock_ops_kern, sk));
5250                 *insn++ = BPF_LDX_MEM(BPF_B, si->dst_reg, si->dst_reg,
5251                                       offsetof(struct sock_common, skc_state));
5252                 break;
5253 
5254         case offsetof(struct bpf_sock_ops, rtt_min):
5255                 BUILD_BUG_ON(FIELD_SIZEOF(struct tcp_sock, rtt_min) !=
5256                              sizeof(struct minmax));
5257                 BUILD_BUG_ON(sizeof(struct minmax) <
5258                              sizeof(struct minmax_sample));
5259 
5260                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(
5261                                                 struct bpf_sock_ops_kern, sk),
5262                                       si->dst_reg, si->src_reg,
5263                                       offsetof(struct bpf_sock_ops_kern, sk));
5264                 *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
5265                                       offsetof(struct tcp_sock, rtt_min) +
5266                                       FIELD_SIZEOF(struct minmax_sample, t));
5267                 break;
5268 
5269 /* Helper macro for adding read access to tcp_sock or sock fields. */
5270 #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                         \
5271         do {                                                                  \
5272                 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >                   \
5273                              FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
5274                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
5275                                                 struct bpf_sock_ops_kern,     \
5276                                                 is_fullsock),                 \
5277                                       si->dst_reg, si->src_reg,               \
5278                                       offsetof(struct bpf_sock_ops_kern,      \
5279                                                is_fullsock));                 \
5280                 *insn++ = BPF_JMP_IMM(BPF_JEQ, si->dst_reg, 0, 2);            \
5281                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
5282                                                 struct bpf_sock_ops_kern, sk),\
5283                                       si->dst_reg, si->src_reg,               \
5284                                       offsetof(struct bpf_sock_ops_kern, sk));\
5285                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(OBJ,                   \
5286                                                        OBJ_FIELD),            \
5287                                       si->dst_reg, si->dst_reg,               \
5288                                       offsetof(OBJ, OBJ_FIELD));              \
5289         } while (0)
5290 
5291 /* Helper macro for adding write access to tcp_sock or sock fields.
5292  * The macro is called with two registers, dst_reg which contains a pointer
5293  * to ctx (context) and src_reg which contains the value that should be
5294  * stored. However, we need an additional register since we cannot overwrite
5295  * dst_reg because it may be used later in the program.
5296  * Instead we "borrow" one of the other register. We first save its value
5297  * into a new (temp) field in bpf_sock_ops_kern, use it, and then restore
5298  * it at the end of the macro.
5299  */
5300 #define SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ)                         \
5301         do {                                                                  \
5302                 int reg = BPF_REG_9;                                          \
5303                 BUILD_BUG_ON(FIELD_SIZEOF(OBJ, OBJ_FIELD) >                   \
5304                              FIELD_SIZEOF(struct bpf_sock_ops, BPF_FIELD));   \
5305                 if (si->dst_reg == reg || si->src_reg == reg)                 \
5306                         reg--;                                                \
5307                 if (si->dst_reg == reg || si->src_reg == reg)                 \
5308                         reg--;                                                \
5309                 *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, reg,               \
5310                                       offsetof(struct bpf_sock_ops_kern,      \
5311                                                temp));                        \
5312                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
5313                                                 struct bpf_sock_ops_kern,     \
5314                                                 is_fullsock),                 \
5315                                       reg, si->dst_reg,                       \
5316                                       offsetof(struct bpf_sock_ops_kern,      \
5317                                                is_fullsock));                 \
5318                 *insn++ = BPF_JMP_IMM(BPF_JEQ, reg, 0, 2);                    \
5319                 *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(                       \
5320                                                 struct bpf_sock_ops_kern, sk),\
5321                                       reg, si->dst_reg,                       \
5322                                       offsetof(struct bpf_sock_ops_kern, sk));\
5323                 *insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(OBJ, OBJ_FIELD),       \
5324                                       reg, si->src_reg,                       \
5325                                       offsetof(OBJ, OBJ_FIELD));              \
5326                 *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->dst_reg,               \
5327                                       offsetof(struct bpf_sock_ops_kern,      \
5328                                                temp));                        \
5329         } while (0)
5330 
5331 #define SOCK_OPS_GET_OR_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ, TYPE)            \
5332         do {                                                                  \
5333                 if (TYPE == BPF_WRITE)                                        \
5334                         SOCK_OPS_SET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
5335                 else                                                          \
5336                         SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ);        \
5337         } while (0)
5338 
5339         case offsetof(struct bpf_sock_ops, snd_cwnd):
5340                 SOCK_OPS_GET_FIELD(snd_cwnd, snd_cwnd, struct tcp_sock);
5341                 break;
5342 
5343         case offsetof(struct bpf_sock_ops, srtt_us):
5344                 SOCK_OPS_GET_FIELD(srtt_us, srtt_us, struct tcp_sock);
5345                 break;
5346 
5347         case offsetof(struct bpf_sock_ops, bpf_sock_ops_cb_flags):
5348                 SOCK_OPS_GET_FIELD(bpf_sock_ops_cb_flags, bpf_sock_ops_cb_flags,
5349                                    struct tcp_sock);
5350                 break;
5351 
5352         case offsetof(struct bpf_sock_ops, snd_ssthresh):
5353                 SOCK_OPS_GET_FIELD(snd_ssthresh, snd_ssthresh, struct tcp_sock);
5354                 break;
5355 
5356         case offsetof(struct bpf_sock_ops, rcv_nxt):
5357                 SOCK_OPS_GET_FIELD(rcv_nxt, rcv_nxt, struct tcp_sock);
5358                 break;
5359 
5360         case offsetof(struct bpf_sock_ops, snd_nxt):
5361                 SOCK_OPS_GET_FIELD(snd_nxt, snd_nxt, struct tcp_sock);
5362                 break;
5363 
5364         case offsetof(struct bpf_sock_ops, snd_una):
5365                 SOCK_OPS_GET_FIELD(snd_una, snd_una, struct tcp_sock);
5366                 break;
5367 
5368         case offsetof(struct bpf_sock_ops, mss_cache):
5369                 SOCK_OPS_GET_FIELD(mss_cache, mss_cache, struct tcp_sock);
5370                 break;
5371 
5372         case offsetof(struct bpf_sock_ops, ecn_flags):
5373                 SOCK_OPS_GET_FIELD(ecn_flags, ecn_flags, struct tcp_sock);
5374                 break;
5375 
5376         case offsetof(struct bpf_sock_ops, rate_delivered):
5377                 SOCK_OPS_GET_FIELD(rate_delivered, rate_delivered,
5378                                    struct tcp_sock);
5379                 break;
5380 
5381         case