~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_sockglue.c

Version: ~ [ linux-5.18-rc6 ] ~ [ linux-5.17.6 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.38 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.114 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.192 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.241 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.277 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.312 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 /*
  3  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  4  *              operating system.  INET is implemented using the  BSD Socket
  5  *              interface as the means of communication with the user level.
  6  *
  7  *              The IP to API glue.
  8  *
  9  * Authors:     see ip.c
 10  *
 11  * Fixes:
 12  *              Many            :       Split from ip.c , see ip.c for history.
 13  *              Martin Mares    :       TOS setting fixed.
 14  *              Alan Cox        :       Fixed a couple of oopses in Martin's
 15  *                                      TOS tweaks.
 16  *              Mike McLagan    :       Routing by source
 17  */
 18 
 19 #include <linux/module.h>
 20 #include <linux/types.h>
 21 #include <linux/mm.h>
 22 #include <linux/skbuff.h>
 23 #include <linux/ip.h>
 24 #include <linux/icmp.h>
 25 #include <linux/inetdevice.h>
 26 #include <linux/netdevice.h>
 27 #include <linux/slab.h>
 28 #include <net/sock.h>
 29 #include <net/ip.h>
 30 #include <net/icmp.h>
 31 #include <net/tcp_states.h>
 32 #include <linux/udp.h>
 33 #include <linux/igmp.h>
 34 #include <linux/netfilter.h>
 35 #include <linux/route.h>
 36 #include <linux/mroute.h>
 37 #include <net/inet_ecn.h>
 38 #include <net/route.h>
 39 #include <net/xfrm.h>
 40 #include <net/compat.h>
 41 #include <net/checksum.h>
 42 #if IS_ENABLED(CONFIG_IPV6)
 43 #include <net/transp_v6.h>
 44 #endif
 45 #include <net/ip_fib.h>
 46 
 47 #include <linux/errqueue.h>
 48 #include <linux/uaccess.h>
 49 
 50 #include <linux/bpfilter.h>
 51 
 52 /*
 53  *      SOL_IP control messages.
 54  */
 55 
 56 static void ip_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 57 {
 58         struct in_pktinfo info = *PKTINFO_SKB_CB(skb);
 59 
 60         info.ipi_addr.s_addr = ip_hdr(skb)->daddr;
 61 
 62         put_cmsg(msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
 63 }
 64 
 65 static void ip_cmsg_recv_ttl(struct msghdr *msg, struct sk_buff *skb)
 66 {
 67         int ttl = ip_hdr(skb)->ttl;
 68         put_cmsg(msg, SOL_IP, IP_TTL, sizeof(int), &ttl);
 69 }
 70 
 71 static void ip_cmsg_recv_tos(struct msghdr *msg, struct sk_buff *skb)
 72 {
 73         put_cmsg(msg, SOL_IP, IP_TOS, 1, &ip_hdr(skb)->tos);
 74 }
 75 
 76 static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb)
 77 {
 78         if (IPCB(skb)->opt.optlen == 0)
 79                 return;
 80 
 81         put_cmsg(msg, SOL_IP, IP_RECVOPTS, IPCB(skb)->opt.optlen,
 82                  ip_hdr(skb) + 1);
 83 }
 84 
 85 
 86 static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg,
 87                                  struct sk_buff *skb)
 88 {
 89         unsigned char optbuf[sizeof(struct ip_options) + 40];
 90         struct ip_options *opt = (struct ip_options *)optbuf;
 91 
 92         if (IPCB(skb)->opt.optlen == 0)
 93                 return;
 94 
 95         if (ip_options_echo(net, opt, skb)) {
 96                 msg->msg_flags |= MSG_CTRUNC;
 97                 return;
 98         }
 99         ip_options_undo(opt);
100 
101         put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
102 }
103 
104 static void ip_cmsg_recv_fragsize(struct msghdr *msg, struct sk_buff *skb)
105 {
106         int val;
107 
108         if (IPCB(skb)->frag_max_size == 0)
109                 return;
110 
111         val = IPCB(skb)->frag_max_size;
112         put_cmsg(msg, SOL_IP, IP_RECVFRAGSIZE, sizeof(val), &val);
113 }
114 
115 static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
116                                   int tlen, int offset)
117 {
118         __wsum csum = skb->csum;
119 
120         if (skb->ip_summed != CHECKSUM_COMPLETE)
121                 return;
122 
123         if (offset != 0) {
124                 int tend_off = skb_transport_offset(skb) + tlen;
125                 csum = csum_sub(csum, skb_checksum(skb, tend_off, offset, 0));
126         }
127 
128         put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
129 }
130 
131 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
132 {
133         char *secdata;
134         u32 seclen, secid;
135         int err;
136 
137         err = security_socket_getpeersec_dgram(NULL, skb, &secid);
138         if (err)
139                 return;
140 
141         err = security_secid_to_secctx(secid, &secdata, &seclen);
142         if (err)
143                 return;
144 
145         put_cmsg(msg, SOL_IP, SCM_SECURITY, seclen, secdata);
146         security_release_secctx(secdata, seclen);
147 }
148 
149 static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
150 {
151         __be16 _ports[2], *ports;
152         struct sockaddr_in sin;
153 
154         /* All current transport protocols have the port numbers in the
155          * first four bytes of the transport header and this function is
156          * written with this assumption in mind.
157          */
158         ports = skb_header_pointer(skb, skb_transport_offset(skb),
159                                    sizeof(_ports), &_ports);
160         if (!ports)
161                 return;
162 
163         sin.sin_family = AF_INET;
164         sin.sin_addr.s_addr = ip_hdr(skb)->daddr;
165         sin.sin_port = ports[1];
166         memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
167 
168         put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
169 }
170 
171 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
172                          struct sk_buff *skb, int tlen, int offset)
173 {
174         struct inet_sock *inet = inet_sk(sk);
175         unsigned int flags = inet->cmsg_flags;
176 
177         /* Ordered by supposed usage frequency */
178         if (flags & IP_CMSG_PKTINFO) {
179                 ip_cmsg_recv_pktinfo(msg, skb);
180 
181                 flags &= ~IP_CMSG_PKTINFO;
182                 if (!flags)
183                         return;
184         }
185 
186         if (flags & IP_CMSG_TTL) {
187                 ip_cmsg_recv_ttl(msg, skb);
188 
189                 flags &= ~IP_CMSG_TTL;
190                 if (!flags)
191                         return;
192         }
193 
194         if (flags & IP_CMSG_TOS) {
195                 ip_cmsg_recv_tos(msg, skb);
196 
197                 flags &= ~IP_CMSG_TOS;
198                 if (!flags)
199                         return;
200         }
201 
202         if (flags & IP_CMSG_RECVOPTS) {
203                 ip_cmsg_recv_opts(msg, skb);
204 
205                 flags &= ~IP_CMSG_RECVOPTS;
206                 if (!flags)
207                         return;
208         }
209 
210         if (flags & IP_CMSG_RETOPTS) {
211                 ip_cmsg_recv_retopts(sock_net(sk), msg, skb);
212 
213                 flags &= ~IP_CMSG_RETOPTS;
214                 if (!flags)
215                         return;
216         }
217 
218         if (flags & IP_CMSG_PASSSEC) {
219                 ip_cmsg_recv_security(msg, skb);
220 
221                 flags &= ~IP_CMSG_PASSSEC;
222                 if (!flags)
223                         return;
224         }
225 
226         if (flags & IP_CMSG_ORIGDSTADDR) {
227                 ip_cmsg_recv_dstaddr(msg, skb);
228 
229                 flags &= ~IP_CMSG_ORIGDSTADDR;
230                 if (!flags)
231                         return;
232         }
233 
234         if (flags & IP_CMSG_CHECKSUM)
235                 ip_cmsg_recv_checksum(msg, skb, tlen, offset);
236 
237         if (flags & IP_CMSG_RECVFRAGSIZE)
238                 ip_cmsg_recv_fragsize(msg, skb);
239 }
240 EXPORT_SYMBOL(ip_cmsg_recv_offset);
241 
242 int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc,
243                  bool allow_ipv6)
244 {
245         int err, val;
246         struct cmsghdr *cmsg;
247         struct net *net = sock_net(sk);
248 
249         for_each_cmsghdr(cmsg, msg) {
250                 if (!CMSG_OK(msg, cmsg))
251                         return -EINVAL;
252 #if IS_ENABLED(CONFIG_IPV6)
253                 if (allow_ipv6 &&
254                     cmsg->cmsg_level == SOL_IPV6 &&
255                     cmsg->cmsg_type == IPV6_PKTINFO) {
256                         struct in6_pktinfo *src_info;
257 
258                         if (cmsg->cmsg_len < CMSG_LEN(sizeof(*src_info)))
259                                 return -EINVAL;
260                         src_info = (struct in6_pktinfo *)CMSG_DATA(cmsg);
261                         if (!ipv6_addr_v4mapped(&src_info->ipi6_addr))
262                                 return -EINVAL;
263                         if (src_info->ipi6_ifindex)
264                                 ipc->oif = src_info->ipi6_ifindex;
265                         ipc->addr = src_info->ipi6_addr.s6_addr32[3];
266                         continue;
267                 }
268 #endif
269                 if (cmsg->cmsg_level == SOL_SOCKET) {
270                         err = __sock_cmsg_send(sk, msg, cmsg, &ipc->sockc);
271                         if (err)
272                                 return err;
273                         continue;
274                 }
275 
276                 if (cmsg->cmsg_level != SOL_IP)
277                         continue;
278                 switch (cmsg->cmsg_type) {
279                 case IP_RETOPTS:
280                         err = cmsg->cmsg_len - sizeof(struct cmsghdr);
281 
282                         /* Our caller is responsible for freeing ipc->opt */
283                         err = ip_options_get(net, &ipc->opt, CMSG_DATA(cmsg),
284                                              err < 40 ? err : 40);
285                         if (err)
286                                 return err;
287                         break;
288                 case IP_PKTINFO:
289                 {
290                         struct in_pktinfo *info;
291                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(struct in_pktinfo)))
292                                 return -EINVAL;
293                         info = (struct in_pktinfo *)CMSG_DATA(cmsg);
294                         if (info->ipi_ifindex)
295                                 ipc->oif = info->ipi_ifindex;
296                         ipc->addr = info->ipi_spec_dst.s_addr;
297                         break;
298                 }
299                 case IP_TTL:
300                         if (cmsg->cmsg_len != CMSG_LEN(sizeof(int)))
301                                 return -EINVAL;
302                         val = *(int *)CMSG_DATA(cmsg);
303                         if (val < 1 || val > 255)
304                                 return -EINVAL;
305                         ipc->ttl = val;
306                         break;
307                 case IP_TOS:
308                         if (cmsg->cmsg_len == CMSG_LEN(sizeof(int)))
309                                 val = *(int *)CMSG_DATA(cmsg);
310                         else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8)))
311                                 val = *(u8 *)CMSG_DATA(cmsg);
312                         else
313                                 return -EINVAL;
314                         if (val < 0 || val > 255)
315                                 return -EINVAL;
316                         ipc->tos = val;
317                         ipc->priority = rt_tos2priority(ipc->tos);
318                         break;
319 
320                 default:
321                         return -EINVAL;
322                 }
323         }
324         return 0;
325 }
326 
327 static void ip_ra_destroy_rcu(struct rcu_head *head)
328 {
329         struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu);
330 
331         sock_put(ra->saved_sk);
332         kfree(ra);
333 }
334 
335 int ip_ra_control(struct sock *sk, unsigned char on,
336                   void (*destructor)(struct sock *))
337 {
338         struct ip_ra_chain *ra, *new_ra;
339         struct ip_ra_chain __rcu **rap;
340         struct net *net = sock_net(sk);
341 
342         if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW)
343                 return -EINVAL;
344 
345         new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
346         if (on && !new_ra)
347                 return -ENOMEM;
348 
349         mutex_lock(&net->ipv4.ra_mutex);
350         for (rap = &net->ipv4.ra_chain;
351              (ra = rcu_dereference_protected(*rap,
352                         lockdep_is_held(&net->ipv4.ra_mutex))) != NULL;
353              rap = &ra->next) {
354                 if (ra->sk == sk) {
355                         if (on) {
356                                 mutex_unlock(&net->ipv4.ra_mutex);
357                                 kfree(new_ra);
358                                 return -EADDRINUSE;
359                         }
360                         /* dont let ip_call_ra_chain() use sk again */
361                         ra->sk = NULL;
362                         RCU_INIT_POINTER(*rap, ra->next);
363                         mutex_unlock(&net->ipv4.ra_mutex);
364 
365                         if (ra->destructor)
366                                 ra->destructor(sk);
367                         /*
368                          * Delay sock_put(sk) and kfree(ra) after one rcu grace
369                          * period. This guarantee ip_call_ra_chain() dont need
370                          * to mess with socket refcounts.
371                          */
372                         ra->saved_sk = sk;
373                         call_rcu(&ra->rcu, ip_ra_destroy_rcu);
374                         return 0;
375                 }
376         }
377         if (!new_ra) {
378                 mutex_unlock(&net->ipv4.ra_mutex);
379                 return -ENOBUFS;
380         }
381         new_ra->sk = sk;
382         new_ra->destructor = destructor;
383 
384         RCU_INIT_POINTER(new_ra->next, ra);
385         rcu_assign_pointer(*rap, new_ra);
386         sock_hold(sk);
387         mutex_unlock(&net->ipv4.ra_mutex);
388 
389         return 0;
390 }
391 
392 void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err,
393                    __be16 port, u32 info, u8 *payload)
394 {
395         struct sock_exterr_skb *serr;
396 
397         skb = skb_clone(skb, GFP_ATOMIC);
398         if (!skb)
399                 return;
400 
401         serr = SKB_EXT_ERR(skb);
402         serr->ee.ee_errno = err;
403         serr->ee.ee_origin = SO_EE_ORIGIN_ICMP;
404         serr->ee.ee_type = icmp_hdr(skb)->type;
405         serr->ee.ee_code = icmp_hdr(skb)->code;
406         serr->ee.ee_pad = 0;
407         serr->ee.ee_info = info;
408         serr->ee.ee_data = 0;
409         serr->addr_offset = (u8 *)&(((struct iphdr *)(icmp_hdr(skb) + 1))->daddr) -
410                                    skb_network_header(skb);
411         serr->port = port;
412 
413         if (skb_pull(skb, payload - skb->data)) {
414                 skb_reset_transport_header(skb);
415                 if (sock_queue_err_skb(sk, skb) == 0)
416                         return;
417         }
418         kfree_skb(skb);
419 }
420 
421 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info)
422 {
423         struct inet_sock *inet = inet_sk(sk);
424         struct sock_exterr_skb *serr;
425         struct iphdr *iph;
426         struct sk_buff *skb;
427 
428         if (!inet->recverr)
429                 return;
430 
431         skb = alloc_skb(sizeof(struct iphdr), GFP_ATOMIC);
432         if (!skb)
433                 return;
434 
435         skb_put(skb, sizeof(struct iphdr));
436         skb_reset_network_header(skb);
437         iph = ip_hdr(skb);
438         iph->daddr = daddr;
439 
440         serr = SKB_EXT_ERR(skb);
441         serr->ee.ee_errno = err;
442         serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL;
443         serr->ee.ee_type = 0;
444         serr->ee.ee_code = 0;
445         serr->ee.ee_pad = 0;
446         serr->ee.ee_info = info;
447         serr->ee.ee_data = 0;
448         serr->addr_offset = (u8 *)&iph->daddr - skb_network_header(skb);
449         serr->port = port;
450 
451         __skb_pull(skb, skb_tail_pointer(skb) - skb->data);
452         skb_reset_transport_header(skb);
453 
454         if (sock_queue_err_skb(sk, skb))
455                 kfree_skb(skb);
456 }
457 
458 /* For some errors we have valid addr_offset even with zero payload and
459  * zero port. Also, addr_offset should be supported if port is set.
460  */
461 static inline bool ipv4_datagram_support_addr(struct sock_exterr_skb *serr)
462 {
463         return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
464                serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port;
465 }
466 
467 /* IPv4 supports cmsg on all imcp errors and some timestamps
468  *
469  * Timestamp code paths do not initialize the fields expected by cmsg:
470  * the PKTINFO fields in skb->cb[]. Fill those in here.
471  */
472 static bool ipv4_datagram_support_cmsg(const struct sock *sk,
473                                        struct sk_buff *skb,
474                                        int ee_origin)
475 {
476         struct in_pktinfo *info;
477 
478         if (ee_origin == SO_EE_ORIGIN_ICMP)
479                 return true;
480 
481         if (ee_origin == SO_EE_ORIGIN_LOCAL)
482                 return false;
483 
484         /* Support IP_PKTINFO on tstamp packets if requested, to correlate
485          * timestamp with egress dev. Not possible for packets without iif
486          * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
487          */
488         info = PKTINFO_SKB_CB(skb);
489         if (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG) ||
490             !info->ipi_ifindex)
491                 return false;
492 
493         info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
494         return true;
495 }
496 
497 /*
498  *      Handle MSG_ERRQUEUE
499  */
500 int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
501 {
502         struct sock_exterr_skb *serr;
503         struct sk_buff *skb;
504         DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
505         struct {
506                 struct sock_extended_err ee;
507                 struct sockaddr_in       offender;
508         } errhdr;
509         int err;
510         int copied;
511 
512         err = -EAGAIN;
513         skb = sock_dequeue_err_skb(sk);
514         if (!skb)
515                 goto out;
516 
517         copied = skb->len;
518         if (copied > len) {
519                 msg->msg_flags |= MSG_TRUNC;
520                 copied = len;
521         }
522         err = skb_copy_datagram_msg(skb, 0, msg, copied);
523         if (unlikely(err)) {
524                 kfree_skb(skb);
525                 return err;
526         }
527         sock_recv_timestamp(msg, sk, skb);
528 
529         serr = SKB_EXT_ERR(skb);
530 
531         if (sin && ipv4_datagram_support_addr(serr)) {
532                 sin->sin_family = AF_INET;
533                 sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
534                                                    serr->addr_offset);
535                 sin->sin_port = serr->port;
536                 memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
537                 *addr_len = sizeof(*sin);
538         }
539 
540         memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
541         sin = &errhdr.offender;
542         memset(sin, 0, sizeof(*sin));
543 
544         if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
545                 sin->sin_family = AF_INET;
546                 sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
547                 if (inet_sk(sk)->cmsg_flags)
548                         ip_cmsg_recv(msg, skb);
549         }
550 
551         put_cmsg(msg, SOL_IP, IP_RECVERR, sizeof(errhdr), &errhdr);
552 
553         /* Now we could try to dump offended packet options */
554 
555         msg->msg_flags |= MSG_ERRQUEUE;
556         err = copied;
557 
558         consume_skb(skb);
559 out:
560         return err;
561 }
562 
563 static void __ip_sock_set_tos(struct sock *sk, int val)
564 {
565         if (sk->sk_type == SOCK_STREAM) {
566                 val &= ~INET_ECN_MASK;
567                 val |= inet_sk(sk)->tos & INET_ECN_MASK;
568         }
569         if (inet_sk(sk)->tos != val) {
570                 inet_sk(sk)->tos = val;
571                 sk->sk_priority = rt_tos2priority(val);
572                 sk_dst_reset(sk);
573         }
574 }
575 
576 void ip_sock_set_tos(struct sock *sk, int val)
577 {
578         lock_sock(sk);
579         __ip_sock_set_tos(sk, val);
580         release_sock(sk);
581 }
582 EXPORT_SYMBOL(ip_sock_set_tos);
583 
584 void ip_sock_set_freebind(struct sock *sk)
585 {
586         lock_sock(sk);
587         inet_sk(sk)->freebind = true;
588         release_sock(sk);
589 }
590 EXPORT_SYMBOL(ip_sock_set_freebind);
591 
592 void ip_sock_set_recverr(struct sock *sk)
593 {
594         lock_sock(sk);
595         inet_sk(sk)->recverr = true;
596         release_sock(sk);
597 }
598 EXPORT_SYMBOL(ip_sock_set_recverr);
599 
600 int ip_sock_set_mtu_discover(struct sock *sk, int val)
601 {
602         if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
603                 return -EINVAL;
604         lock_sock(sk);
605         inet_sk(sk)->pmtudisc = val;
606         release_sock(sk);
607         return 0;
608 }
609 EXPORT_SYMBOL(ip_sock_set_mtu_discover);
610 
611 void ip_sock_set_pktinfo(struct sock *sk)
612 {
613         lock_sock(sk);
614         inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
615         release_sock(sk);
616 }
617 EXPORT_SYMBOL(ip_sock_set_pktinfo);
618 
619 /*
620  *      Socket option code for IP. This is the end of the line after any
621  *      TCP,UDP etc options on an IP socket.
622  */
623 static bool setsockopt_needs_rtnl(int optname)
624 {
625         switch (optname) {
626         case IP_ADD_MEMBERSHIP:
627         case IP_ADD_SOURCE_MEMBERSHIP:
628         case IP_BLOCK_SOURCE:
629         case IP_DROP_MEMBERSHIP:
630         case IP_DROP_SOURCE_MEMBERSHIP:
631         case IP_MSFILTER:
632         case IP_UNBLOCK_SOURCE:
633         case MCAST_BLOCK_SOURCE:
634         case MCAST_MSFILTER:
635         case MCAST_JOIN_GROUP:
636         case MCAST_JOIN_SOURCE_GROUP:
637         case MCAST_LEAVE_GROUP:
638         case MCAST_LEAVE_SOURCE_GROUP:
639         case MCAST_UNBLOCK_SOURCE:
640                 return true;
641         }
642         return false;
643 }
644 
645 static int set_mcast_msfilter(struct sock *sk, int ifindex,
646                               int numsrc, int fmode,
647                               struct sockaddr_storage *group,
648                               struct sockaddr_storage *list)
649 {
650         int msize = IP_MSFILTER_SIZE(numsrc);
651         struct ip_msfilter *msf;
652         struct sockaddr_in *psin;
653         int err, i;
654 
655         msf = kmalloc(msize, GFP_KERNEL);
656         if (!msf)
657                 return -ENOBUFS;
658 
659         psin = (struct sockaddr_in *)group;
660         if (psin->sin_family != AF_INET)
661                 goto Eaddrnotavail;
662         msf->imsf_multiaddr = psin->sin_addr.s_addr;
663         msf->imsf_interface = 0;
664         msf->imsf_fmode = fmode;
665         msf->imsf_numsrc = numsrc;
666         for (i = 0; i < numsrc; ++i) {
667                 psin = (struct sockaddr_in *)&list[i];
668 
669                 if (psin->sin_family != AF_INET)
670                         goto Eaddrnotavail;
671                 msf->imsf_slist[i] = psin->sin_addr.s_addr;
672         }
673         err = ip_mc_msfilter(sk, msf, ifindex);
674         kfree(msf);
675         return err;
676 
677 Eaddrnotavail:
678         kfree(msf);
679         return -EADDRNOTAVAIL;
680 }
681 
682 static int do_mcast_group_source(struct sock *sk, int optname,
683                                  struct group_source_req *greqs)
684 {
685         struct ip_mreq_source mreqs;
686         struct sockaddr_in *psin;
687         int omode, add, err;
688 
689         if (greqs->gsr_group.ss_family != AF_INET ||
690             greqs->gsr_source.ss_family != AF_INET)
691                 return -EADDRNOTAVAIL;
692 
693         psin = (struct sockaddr_in *)&greqs->gsr_group;
694         mreqs.imr_multiaddr = psin->sin_addr.s_addr;
695         psin = (struct sockaddr_in *)&greqs->gsr_source;
696         mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
697         mreqs.imr_interface = 0; /* use index for mc_source */
698 
699         if (optname == MCAST_BLOCK_SOURCE) {
700                 omode = MCAST_EXCLUDE;
701                 add = 1;
702         } else if (optname == MCAST_UNBLOCK_SOURCE) {
703                 omode = MCAST_EXCLUDE;
704                 add = 0;
705         } else if (optname == MCAST_JOIN_SOURCE_GROUP) {
706                 struct ip_mreqn mreq;
707 
708                 psin = (struct sockaddr_in *)&greqs->gsr_group;
709                 mreq.imr_multiaddr = psin->sin_addr;
710                 mreq.imr_address.s_addr = 0;
711                 mreq.imr_ifindex = greqs->gsr_interface;
712                 err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
713                 if (err && err != -EADDRINUSE)
714                         return err;
715                 greqs->gsr_interface = mreq.imr_ifindex;
716                 omode = MCAST_INCLUDE;
717                 add = 1;
718         } else /* MCAST_LEAVE_SOURCE_GROUP */ {
719                 omode = MCAST_INCLUDE;
720                 add = 0;
721         }
722         return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface);
723 }
724 
725 static int do_ip_setsockopt(struct sock *sk, int level,
726                             int optname, char __user *optval, unsigned int optlen)
727 {
728         struct inet_sock *inet = inet_sk(sk);
729         struct net *net = sock_net(sk);
730         int val = 0, err;
731         bool needs_rtnl = setsockopt_needs_rtnl(optname);
732 
733         switch (optname) {
734         case IP_PKTINFO:
735         case IP_RECVTTL:
736         case IP_RECVOPTS:
737         case IP_RECVTOS:
738         case IP_RETOPTS:
739         case IP_TOS:
740         case IP_TTL:
741         case IP_HDRINCL:
742         case IP_MTU_DISCOVER:
743         case IP_RECVERR:
744         case IP_ROUTER_ALERT:
745         case IP_FREEBIND:
746         case IP_PASSSEC:
747         case IP_TRANSPARENT:
748         case IP_MINTTL:
749         case IP_NODEFRAG:
750         case IP_BIND_ADDRESS_NO_PORT:
751         case IP_UNICAST_IF:
752         case IP_MULTICAST_TTL:
753         case IP_MULTICAST_ALL:
754         case IP_MULTICAST_LOOP:
755         case IP_RECVORIGDSTADDR:
756         case IP_CHECKSUM:
757         case IP_RECVFRAGSIZE:
758                 if (optlen >= sizeof(int)) {
759                         if (get_user(val, (int __user *) optval))
760                                 return -EFAULT;
761                 } else if (optlen >= sizeof(char)) {
762                         unsigned char ucval;
763 
764                         if (get_user(ucval, (unsigned char __user *) optval))
765                                 return -EFAULT;
766                         val = (int) ucval;
767                 }
768         }
769 
770         /* If optlen==0, it is equivalent to val == 0 */
771 
772         if (optname == IP_ROUTER_ALERT)
773                 return ip_ra_control(sk, val ? 1 : 0, NULL);
774         if (ip_mroute_opt(optname))
775                 return ip_mroute_setsockopt(sk, optname, optval, optlen);
776 
777         err = 0;
778         if (needs_rtnl)
779                 rtnl_lock();
780         lock_sock(sk);
781 
782         switch (optname) {
783         case IP_OPTIONS:
784         {
785                 struct ip_options_rcu *old, *opt = NULL;
786 
787                 if (optlen > 40)
788                         goto e_inval;
789                 err = ip_options_get_from_user(sock_net(sk), &opt,
790                                                optval, optlen);
791                 if (err)
792                         break;
793                 old = rcu_dereference_protected(inet->inet_opt,
794                                                 lockdep_sock_is_held(sk));
795                 if (inet->is_icsk) {
796                         struct inet_connection_sock *icsk = inet_csk(sk);
797 #if IS_ENABLED(CONFIG_IPV6)
798                         if (sk->sk_family == PF_INET ||
799                             (!((1 << sk->sk_state) &
800                                (TCPF_LISTEN | TCPF_CLOSE)) &&
801                              inet->inet_daddr != LOOPBACK4_IPV6)) {
802 #endif
803                                 if (old)
804                                         icsk->icsk_ext_hdr_len -= old->opt.optlen;
805                                 if (opt)
806                                         icsk->icsk_ext_hdr_len += opt->opt.optlen;
807                                 icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
808 #if IS_ENABLED(CONFIG_IPV6)
809                         }
810 #endif
811                 }
812                 rcu_assign_pointer(inet->inet_opt, opt);
813                 if (old)
814                         kfree_rcu(old, rcu);
815                 break;
816         }
817         case IP_PKTINFO:
818                 if (val)
819                         inet->cmsg_flags |= IP_CMSG_PKTINFO;
820                 else
821                         inet->cmsg_flags &= ~IP_CMSG_PKTINFO;
822                 break;
823         case IP_RECVTTL:
824                 if (val)
825                         inet->cmsg_flags |=  IP_CMSG_TTL;
826                 else
827                         inet->cmsg_flags &= ~IP_CMSG_TTL;
828                 break;
829         case IP_RECVTOS:
830                 if (val)
831                         inet->cmsg_flags |=  IP_CMSG_TOS;
832                 else
833                         inet->cmsg_flags &= ~IP_CMSG_TOS;
834                 break;
835         case IP_RECVOPTS:
836                 if (val)
837                         inet->cmsg_flags |=  IP_CMSG_RECVOPTS;
838                 else
839                         inet->cmsg_flags &= ~IP_CMSG_RECVOPTS;
840                 break;
841         case IP_RETOPTS:
842                 if (val)
843                         inet->cmsg_flags |= IP_CMSG_RETOPTS;
844                 else
845                         inet->cmsg_flags &= ~IP_CMSG_RETOPTS;
846                 break;
847         case IP_PASSSEC:
848                 if (val)
849                         inet->cmsg_flags |= IP_CMSG_PASSSEC;
850                 else
851                         inet->cmsg_flags &= ~IP_CMSG_PASSSEC;
852                 break;
853         case IP_RECVORIGDSTADDR:
854                 if (val)
855                         inet->cmsg_flags |= IP_CMSG_ORIGDSTADDR;
856                 else
857                         inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
858                 break;
859         case IP_CHECKSUM:
860                 if (val) {
861                         if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
862                                 inet_inc_convert_csum(sk);
863                                 inet->cmsg_flags |= IP_CMSG_CHECKSUM;
864                         }
865                 } else {
866                         if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
867                                 inet_dec_convert_csum(sk);
868                                 inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
869                         }
870                 }
871                 break;
872         case IP_RECVFRAGSIZE:
873                 if (sk->sk_type != SOCK_RAW && sk->sk_type != SOCK_DGRAM)
874                         goto e_inval;
875                 if (val)
876                         inet->cmsg_flags |= IP_CMSG_RECVFRAGSIZE;
877                 else
878                         inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
879                 break;
880         case IP_TOS:    /* This sets both TOS and Precedence */
881                 __ip_sock_set_tos(sk, val);
882                 break;
883         case IP_TTL:
884                 if (optlen < 1)
885                         goto e_inval;
886                 if (val != -1 && (val < 1 || val > 255))
887                         goto e_inval;
888                 inet->uc_ttl = val;
889                 break;
890         case IP_HDRINCL:
891                 if (sk->sk_type != SOCK_RAW) {
892                         err = -ENOPROTOOPT;
893                         break;
894                 }
895                 inet->hdrincl = val ? 1 : 0;
896                 break;
897         case IP_NODEFRAG:
898                 if (sk->sk_type != SOCK_RAW) {
899                         err = -ENOPROTOOPT;
900                         break;
901                 }
902                 inet->nodefrag = val ? 1 : 0;
903                 break;
904         case IP_BIND_ADDRESS_NO_PORT:
905                 inet->bind_address_no_port = val ? 1 : 0;
906                 break;
907         case IP_MTU_DISCOVER:
908                 if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
909                         goto e_inval;
910                 inet->pmtudisc = val;
911                 break;
912         case IP_RECVERR:
913                 inet->recverr = !!val;
914                 if (!val)
915                         skb_queue_purge(&sk->sk_error_queue);
916                 break;
917         case IP_MULTICAST_TTL:
918                 if (sk->sk_type == SOCK_STREAM)
919                         goto e_inval;
920                 if (optlen < 1)
921                         goto e_inval;
922                 if (val == -1)
923                         val = 1;
924                 if (val < 0 || val > 255)
925                         goto e_inval;
926                 inet->mc_ttl = val;
927                 break;
928         case IP_MULTICAST_LOOP:
929                 if (optlen < 1)
930                         goto e_inval;
931                 inet->mc_loop = !!val;
932                 break;
933         case IP_UNICAST_IF:
934         {
935                 struct net_device *dev = NULL;
936                 int ifindex;
937                 int midx;
938 
939                 if (optlen != sizeof(int))
940                         goto e_inval;
941 
942                 ifindex = (__force int)ntohl((__force __be32)val);
943                 if (ifindex == 0) {
944                         inet->uc_index = 0;
945                         err = 0;
946                         break;
947                 }
948 
949                 dev = dev_get_by_index(sock_net(sk), ifindex);
950                 err = -EADDRNOTAVAIL;
951                 if (!dev)
952                         break;
953 
954                 midx = l3mdev_master_ifindex(dev);
955                 dev_put(dev);
956 
957                 err = -EINVAL;
958                 if (sk->sk_bound_dev_if &&
959                     (!midx || midx != sk->sk_bound_dev_if))
960                         break;
961 
962                 inet->uc_index = ifindex;
963                 err = 0;
964                 break;
965         }
966         case IP_MULTICAST_IF:
967         {
968                 struct ip_mreqn mreq;
969                 struct net_device *dev = NULL;
970                 int midx;
971 
972                 if (sk->sk_type == SOCK_STREAM)
973                         goto e_inval;
974                 /*
975                  *      Check the arguments are allowable
976                  */
977 
978                 if (optlen < sizeof(struct in_addr))
979                         goto e_inval;
980 
981                 err = -EFAULT;
982                 if (optlen >= sizeof(struct ip_mreqn)) {
983                         if (copy_from_user(&mreq, optval, sizeof(mreq)))
984                                 break;
985                 } else {
986                         memset(&mreq, 0, sizeof(mreq));
987                         if (optlen >= sizeof(struct ip_mreq)) {
988                                 if (copy_from_user(&mreq, optval,
989                                                    sizeof(struct ip_mreq)))
990                                         break;
991                         } else if (optlen >= sizeof(struct in_addr)) {
992                                 if (copy_from_user(&mreq.imr_address, optval,
993                                                    sizeof(struct in_addr)))
994                                         break;
995                         }
996                 }
997 
998                 if (!mreq.imr_ifindex) {
999                         if (mreq.imr_address.s_addr == htonl(INADDR_ANY)) {
1000                                 inet->mc_index = 0;
1001                                 inet->mc_addr  = 0;
1002                                 err = 0;
1003                                 break;
1004                         }
1005                         dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
1006                         if (dev)
1007                                 mreq.imr_ifindex = dev->ifindex;
1008                 } else
1009                         dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
1010 
1011 
1012                 err = -EADDRNOTAVAIL;
1013                 if (!dev)
1014                         break;
1015 
1016                 midx = l3mdev_master_ifindex(dev);
1017 
1018                 dev_put(dev);
1019 
1020                 err = -EINVAL;
1021                 if (sk->sk_bound_dev_if &&
1022                     mreq.imr_ifindex != sk->sk_bound_dev_if &&
1023                     (!midx || midx != sk->sk_bound_dev_if))
1024                         break;
1025 
1026                 inet->mc_index = mreq.imr_ifindex;
1027                 inet->mc_addr  = mreq.imr_address.s_addr;
1028                 err = 0;
1029                 break;
1030         }
1031 
1032         case IP_ADD_MEMBERSHIP:
1033         case IP_DROP_MEMBERSHIP:
1034         {
1035                 struct ip_mreqn mreq;
1036 
1037                 err = -EPROTO;
1038                 if (inet_sk(sk)->is_icsk)
1039                         break;
1040 
1041                 if (optlen < sizeof(struct ip_mreq))
1042                         goto e_inval;
1043                 err = -EFAULT;
1044                 if (optlen >= sizeof(struct ip_mreqn)) {
1045                         if (copy_from_user(&mreq, optval, sizeof(mreq)))
1046                                 break;
1047                 } else {
1048                         memset(&mreq, 0, sizeof(mreq));
1049                         if (copy_from_user(&mreq, optval, sizeof(struct ip_mreq)))
1050                                 break;
1051                 }
1052 
1053                 if (optname == IP_ADD_MEMBERSHIP)
1054                         err = ip_mc_join_group(sk, &mreq);
1055                 else
1056                         err = ip_mc_leave_group(sk, &mreq);
1057                 break;
1058         }
1059         case IP_MSFILTER:
1060         {
1061                 struct ip_msfilter *msf;
1062 
1063                 if (optlen < IP_MSFILTER_SIZE(0))
1064                         goto e_inval;
1065                 if (optlen > sysctl_optmem_max) {
1066                         err = -ENOBUFS;
1067                         break;
1068                 }
1069                 msf = memdup_user(optval, optlen);
1070                 if (IS_ERR(msf)) {
1071                         err = PTR_ERR(msf);
1072                         break;
1073                 }
1074                 /* numsrc >= (1G-4) overflow in 32 bits */
1075                 if (msf->imsf_numsrc >= 0x3ffffffcU ||
1076                     msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1077                         kfree(msf);
1078                         err = -ENOBUFS;
1079                         break;
1080                 }
1081                 if (IP_MSFILTER_SIZE(msf->imsf_numsrc) > optlen) {
1082                         kfree(msf);
1083                         err = -EINVAL;
1084                         break;
1085                 }
1086                 err = ip_mc_msfilter(sk, msf, 0);
1087                 kfree(msf);
1088                 break;
1089         }
1090         case IP_BLOCK_SOURCE:
1091         case IP_UNBLOCK_SOURCE:
1092         case IP_ADD_SOURCE_MEMBERSHIP:
1093         case IP_DROP_SOURCE_MEMBERSHIP:
1094         {
1095                 struct ip_mreq_source mreqs;
1096                 int omode, add;
1097 
1098                 if (optlen != sizeof(struct ip_mreq_source))
1099                         goto e_inval;
1100                 if (copy_from_user(&mreqs, optval, sizeof(mreqs))) {
1101                         err = -EFAULT;
1102                         break;
1103                 }
1104                 if (optname == IP_BLOCK_SOURCE) {
1105                         omode = MCAST_EXCLUDE;
1106                         add = 1;
1107                 } else if (optname == IP_UNBLOCK_SOURCE) {
1108                         omode = MCAST_EXCLUDE;
1109                         add = 0;
1110                 } else if (optname == IP_ADD_SOURCE_MEMBERSHIP) {
1111                         struct ip_mreqn mreq;
1112 
1113                         mreq.imr_multiaddr.s_addr = mreqs.imr_multiaddr;
1114                         mreq.imr_address.s_addr = mreqs.imr_interface;
1115                         mreq.imr_ifindex = 0;
1116                         err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
1117                         if (err && err != -EADDRINUSE)
1118                                 break;
1119                         omode = MCAST_INCLUDE;
1120                         add = 1;
1121                 } else /* IP_DROP_SOURCE_MEMBERSHIP */ {
1122                         omode = MCAST_INCLUDE;
1123                         add = 0;
1124                 }
1125                 err = ip_mc_source(add, omode, sk, &mreqs, 0);
1126                 break;
1127         }
1128         case MCAST_JOIN_GROUP:
1129         case MCAST_LEAVE_GROUP:
1130         {
1131                 struct group_req greq;
1132                 struct sockaddr_in *psin;
1133                 struct ip_mreqn mreq;
1134 
1135                 if (optlen < sizeof(struct group_req))
1136                         goto e_inval;
1137                 err = -EFAULT;
1138                 if (copy_from_user(&greq, optval, sizeof(greq)))
1139                         break;
1140                 psin = (struct sockaddr_in *)&greq.gr_group;
1141                 if (psin->sin_family != AF_INET)
1142                         goto e_inval;
1143                 memset(&mreq, 0, sizeof(mreq));
1144                 mreq.imr_multiaddr = psin->sin_addr;
1145                 mreq.imr_ifindex = greq.gr_interface;
1146 
1147                 if (optname == MCAST_JOIN_GROUP)
1148                         err = ip_mc_join_group(sk, &mreq);
1149                 else
1150                         err = ip_mc_leave_group(sk, &mreq);
1151                 break;
1152         }
1153         case MCAST_JOIN_SOURCE_GROUP:
1154         case MCAST_LEAVE_SOURCE_GROUP:
1155         case MCAST_BLOCK_SOURCE:
1156         case MCAST_UNBLOCK_SOURCE:
1157         {
1158                 struct group_source_req greqs;
1159 
1160                 if (optlen != sizeof(struct group_source_req))
1161                         goto e_inval;
1162                 if (copy_from_user(&greqs, optval, sizeof(greqs))) {
1163                         err = -EFAULT;
1164                         break;
1165                 }
1166                 err = do_mcast_group_source(sk, optname, &greqs);
1167                 break;
1168         }
1169         case MCAST_MSFILTER:
1170         {
1171                 struct group_filter *gsf = NULL;
1172 
1173                 if (optlen < GROUP_FILTER_SIZE(0))
1174                         goto e_inval;
1175                 if (optlen > sysctl_optmem_max) {
1176                         err = -ENOBUFS;
1177                         break;
1178                 }
1179                 gsf = memdup_user(optval, optlen);
1180                 if (IS_ERR(gsf)) {
1181                         err = PTR_ERR(gsf);
1182                         break;
1183                 }
1184                 /* numsrc >= (4G-140)/128 overflow in 32 bits */
1185                 if (gsf->gf_numsrc >= 0x1ffffff ||
1186                     gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
1187                         err = -ENOBUFS;
1188                         goto mc_msf_out;
1189                 }
1190                 if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen) {
1191                         err = -EINVAL;
1192                         goto mc_msf_out;
1193                 }
1194                 err = set_mcast_msfilter(sk, gsf->gf_interface,
1195                                          gsf->gf_numsrc, gsf->gf_fmode,
1196                                          &gsf->gf_group, gsf->gf_slist);
1197 mc_msf_out:
1198                 kfree(gsf);
1199                 break;
1200         }
1201         case IP_MULTICAST_ALL:
1202                 if (optlen < 1)
1203                         goto e_inval;
1204                 if (val != 0 && val != 1)
1205                         goto e_inval;
1206                 inet->mc_all = val;
1207                 break;
1208 
1209         case IP_FREEBIND:
1210                 if (optlen < 1)
1211                         goto e_inval;
1212                 inet->freebind = !!val;
1213                 break;
1214 
1215         case IP_IPSEC_POLICY:
1216         case IP_XFRM_POLICY:
1217                 err = -EPERM;
1218                 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1219                         break;
1220                 err = xfrm_user_policy(sk, optname, optval, optlen);
1221                 break;
1222 
1223         case IP_TRANSPARENT:
1224                 if (!!val && !ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
1225                     !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1226                         err = -EPERM;
1227                         break;
1228                 }
1229                 if (optlen < 1)
1230                         goto e_inval;
1231                 inet->transparent = !!val;
1232                 break;
1233 
1234         case IP_MINTTL:
1235                 if (optlen < 1)
1236                         goto e_inval;
1237                 if (val < 0 || val > 255)
1238                         goto e_inval;
1239                 inet->min_ttl = val;
1240                 break;
1241 
1242         default:
1243                 err = -ENOPROTOOPT;
1244                 break;
1245         }
1246         release_sock(sk);
1247         if (needs_rtnl)
1248                 rtnl_unlock();
1249         return err;
1250 
1251 e_inval:
1252         release_sock(sk);
1253         if (needs_rtnl)
1254                 rtnl_unlock();
1255         return -EINVAL;
1256 }
1257 
1258 /**
1259  * ipv4_pktinfo_prepare - transfer some info from rtable to skb
1260  * @sk: socket
1261  * @skb: buffer
1262  *
1263  * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
1264  * destination in skb->cb[] before dst drop.
1265  * This way, receiver doesn't make cache line misses to read rtable.
1266  */
1267 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
1268 {
1269         struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
1270         bool prepare = (inet_sk(sk)->cmsg_flags & IP_CMSG_PKTINFO) ||
1271                        ipv6_sk_rxinfo(sk);
1272 
1273         if (prepare && skb_rtable(skb)) {
1274                 /* skb->cb is overloaded: prior to this point it is IP{6}CB
1275                  * which has interface index (iif) as the first member of the
1276                  * underlying inet{6}_skb_parm struct. This code then overlays
1277                  * PKTINFO_SKB_CB and in_pktinfo also has iif as the first
1278                  * element so the iif is picked up from the prior IPCB. If iif
1279                  * is the loopback interface, then return the sending interface
1280                  * (e.g., process binds socket to eth0 for Tx which is
1281                  * redirected to loopback in the rtable/dst).
1282                  */
1283                 struct rtable *rt = skb_rtable(skb);
1284                 bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
1285 
1286                 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
1287                         pktinfo->ipi_ifindex = inet_iif(skb);
1288                 else if (l3slave && rt && rt->rt_iif)
1289                         pktinfo->ipi_ifindex = rt->rt_iif;
1290 
1291                 pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
1292         } else {
1293                 pktinfo->ipi_ifindex = 0;
1294                 pktinfo->ipi_spec_dst.s_addr = 0;
1295         }
1296         skb_dst_drop(skb);
1297 }
1298 
1299 int ip_setsockopt(struct sock *sk, int level,
1300                 int optname, char __user *optval, unsigned int optlen)
1301 {
1302         int err;
1303 
1304         if (level != SOL_IP)
1305                 return -ENOPROTOOPT;
1306 
1307         err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1308 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1309         if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
1310             optname < BPFILTER_IPT_SET_MAX)
1311                 err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
1312 #endif
1313 #ifdef CONFIG_NETFILTER
1314         /* we need to exclude all possible ENOPROTOOPTs except default case */
1315         if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1316                         optname != IP_IPSEC_POLICY &&
1317                         optname != IP_XFRM_POLICY &&
1318                         !ip_mroute_opt(optname))
1319                 err = nf_setsockopt(sk, PF_INET, optname, optval, optlen);
1320 #endif
1321         return err;
1322 }
1323 EXPORT_SYMBOL(ip_setsockopt);
1324 
1325 #ifdef CONFIG_COMPAT
1326 int compat_ip_setsockopt(struct sock *sk, int level, int optname,
1327                          char __user *optval, unsigned int optlen)
1328 {
1329         int err;
1330 
1331         if (level != SOL_IP)
1332                 return -ENOPROTOOPT;
1333 
1334         switch (optname) {
1335         case MCAST_JOIN_GROUP:
1336         case MCAST_LEAVE_GROUP:
1337         {
1338                 struct compat_group_req __user *gr32 = (void __user *)optval;
1339                 struct group_req greq;
1340                 struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group;
1341                 struct ip_mreqn mreq;
1342 
1343                 if (optlen < sizeof(struct compat_group_req))
1344                         return -EINVAL;
1345 
1346                 if (get_user(greq.gr_interface, &gr32->gr_interface) ||
1347                     copy_from_user(&greq.gr_group, &gr32->gr_group,
1348                                 sizeof(greq.gr_group)))
1349                         return -EFAULT;
1350 
1351                 if (psin->sin_family != AF_INET)
1352                         return -EINVAL;
1353 
1354                 memset(&mreq, 0, sizeof(mreq));
1355                 mreq.imr_multiaddr = psin->sin_addr;
1356                 mreq.imr_ifindex = greq.gr_interface;
1357 
1358                 rtnl_lock();
1359                 lock_sock(sk);
1360                 if (optname == MCAST_JOIN_GROUP)
1361                         err = ip_mc_join_group(sk, &mreq);
1362                 else
1363                         err = ip_mc_leave_group(sk, &mreq);
1364                 release_sock(sk);
1365                 rtnl_unlock();
1366                 return err;
1367         }
1368         case MCAST_JOIN_SOURCE_GROUP:
1369         case MCAST_LEAVE_SOURCE_GROUP:
1370         case MCAST_BLOCK_SOURCE:
1371         case MCAST_UNBLOCK_SOURCE:
1372         {
1373                 struct compat_group_source_req __user *gsr32 = (void __user *)optval;
1374                 struct group_source_req greqs;
1375 
1376                 if (optlen != sizeof(struct compat_group_source_req))
1377                         return -EINVAL;
1378 
1379                 if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
1380                     copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
1381                                 sizeof(greqs.gsr_group)) ||
1382                     copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
1383                                 sizeof(greqs.gsr_source)))
1384                         return -EFAULT;
1385 
1386                 rtnl_lock();
1387                 lock_sock(sk);
1388                 err = do_mcast_group_source(sk, optname, &greqs);
1389                 release_sock(sk);
1390                 rtnl_unlock();
1391                 return err;
1392         }
1393         case MCAST_MSFILTER:
1394         {
1395                 const int size0 = offsetof(struct compat_group_filter, gf_slist);
1396                 struct compat_group_filter *gf32;
1397                 unsigned int n;
1398                 void *p;
1399 
1400                 if (optlen < size0)
1401                         return -EINVAL;
1402                 if (optlen > sysctl_optmem_max - 4)
1403                         return -ENOBUFS;
1404 
1405                 p = kmalloc(optlen + 4, GFP_KERNEL);
1406                 if (!p)
1407                         return -ENOMEM;
1408                 gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
1409                 if (copy_from_user(gf32, optval, optlen)) {
1410                         err = -EFAULT;
1411                         goto mc_msf_out;
1412                 }
1413 
1414                 n = gf32->gf_numsrc;
1415                 /* numsrc >= (4G-140)/128 overflow in 32 bits */
1416                 if (n >= 0x1ffffff) {
1417                         err = -ENOBUFS;
1418                         goto mc_msf_out;
1419                 }
1420                 if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
1421                         err = -EINVAL;
1422                         goto mc_msf_out;
1423                 }
1424 
1425                 rtnl_lock();
1426                 lock_sock(sk);
1427                 /* numsrc >= (4G-140)/128 overflow in 32 bits */
1428                 if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
1429                         err = -ENOBUFS;
1430                 else
1431                         err = set_mcast_msfilter(sk, gf32->gf_interface,
1432                                                  n, gf32->gf_fmode,
1433                                                  &gf32->gf_group, gf32->gf_slist);
1434                 release_sock(sk);
1435                 rtnl_unlock();
1436 mc_msf_out:
1437                 kfree(p);
1438                 return err;
1439         }
1440         }
1441 
1442         err = do_ip_setsockopt(sk, level, optname, optval, optlen);
1443 #ifdef CONFIG_NETFILTER
1444         /* we need to exclude all possible ENOPROTOOPTs except default case */
1445         if (err == -ENOPROTOOPT && optname != IP_HDRINCL &&
1446                         optname != IP_IPSEC_POLICY &&
1447                         optname != IP_XFRM_POLICY &&
1448                         !ip_mroute_opt(optname))
1449                 err = compat_nf_setsockopt(sk, PF_INET, optname, optval,
1450                                            optlen);
1451 #endif
1452         return err;
1453 }
1454 EXPORT_SYMBOL(compat_ip_setsockopt);
1455 #endif
1456 
1457 /*
1458  *      Get the options. Note for future reference. The GET of IP options gets
1459  *      the _received_ ones. The set sets the _sent_ ones.
1460  */
1461 
1462 static bool getsockopt_needs_rtnl(int optname)
1463 {
1464         switch (optname) {
1465         case IP_MSFILTER:
1466         case MCAST_MSFILTER:
1467                 return true;
1468         }
1469         return false;
1470 }
1471 
1472 static int do_ip_getsockopt(struct sock *sk, int level, int optname,
1473                             char __user *optval, int __user *optlen, unsigned int flags)
1474 {
1475         struct inet_sock *inet = inet_sk(sk);
1476         bool needs_rtnl = getsockopt_needs_rtnl(optname);
1477         int val, err = 0;
1478         int len;
1479 
1480         if (level != SOL_IP)
1481                 return -EOPNOTSUPP;
1482 
1483         if (ip_mroute_opt(optname))
1484                 return ip_mroute_getsockopt(sk, optname, optval, optlen);
1485 
1486         if (get_user(len, optlen))
1487                 return -EFAULT;
1488         if (len < 0)
1489                 return -EINVAL;
1490 
1491         if (needs_rtnl)
1492                 rtnl_lock();
1493         lock_sock(sk);
1494 
1495         switch (optname) {
1496         case IP_OPTIONS:
1497         {
1498                 unsigned char optbuf[sizeof(struct ip_options)+40];
1499                 struct ip_options *opt = (struct ip_options *)optbuf;
1500                 struct ip_options_rcu *inet_opt;
1501 
1502                 inet_opt = rcu_dereference_protected(inet->inet_opt,
1503                                                      lockdep_sock_is_held(sk));
1504                 opt->optlen = 0;
1505                 if (inet_opt)
1506                         memcpy(optbuf, &inet_opt->opt,
1507                                sizeof(struct ip_options) +
1508                                inet_opt->opt.optlen);
1509                 release_sock(sk);
1510 
1511                 if (opt->optlen == 0)
1512                         return put_user(0, optlen);
1513 
1514                 ip_options_undo(opt);
1515 
1516                 len = min_t(unsigned int, len, opt->optlen);
1517                 if (put_user(len, optlen))
1518                         return -EFAULT;
1519                 if (copy_to_user(optval, opt->__data, len))
1520                         return -EFAULT;
1521                 return 0;
1522         }
1523         case IP_PKTINFO:
1524                 val = (inet->cmsg_flags & IP_CMSG_PKTINFO) != 0;
1525                 break;
1526         case IP_RECVTTL:
1527                 val = (inet->cmsg_flags & IP_CMSG_TTL) != 0;
1528                 break;
1529         case IP_RECVTOS:
1530                 val = (inet->cmsg_flags & IP_CMSG_TOS) != 0;
1531                 break;
1532         case IP_RECVOPTS:
1533                 val = (inet->cmsg_flags & IP_CMSG_RECVOPTS) != 0;
1534                 break;
1535         case IP_RETOPTS:
1536                 val = (inet->cmsg_flags & IP_CMSG_RETOPTS) != 0;
1537                 break;
1538         case IP_PASSSEC:
1539                 val = (inet->cmsg_flags & IP_CMSG_PASSSEC) != 0;
1540                 break;
1541         case IP_RECVORIGDSTADDR:
1542                 val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
1543                 break;
1544         case IP_CHECKSUM:
1545                 val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
1546                 break;
1547         case IP_RECVFRAGSIZE:
1548                 val = (inet->cmsg_flags & IP_CMSG_RECVFRAGSIZE) != 0;
1549                 break;
1550         case IP_TOS:
1551                 val = inet->tos;
1552                 break;
1553         case IP_TTL:
1554         {
1555                 struct net *net = sock_net(sk);
1556                 val = (inet->uc_ttl == -1 ?
1557                        net->ipv4.sysctl_ip_default_ttl :
1558                        inet->uc_ttl);
1559                 break;
1560         }
1561         case IP_HDRINCL:
1562                 val = inet->hdrincl;
1563                 break;
1564         case IP_NODEFRAG:
1565                 val = inet->nodefrag;
1566                 break;
1567         case IP_BIND_ADDRESS_NO_PORT:
1568                 val = inet->bind_address_no_port;
1569                 break;
1570         case IP_MTU_DISCOVER:
1571                 val = inet->pmtudisc;
1572                 break;
1573         case IP_MTU:
1574         {
1575                 struct dst_entry *dst;
1576                 val = 0;
1577                 dst = sk_dst_get(sk);
1578                 if (dst) {
1579                         val = dst_mtu(dst);
1580                         dst_release(dst);
1581                 }
1582                 if (!val) {
1583                         release_sock(sk);
1584                         return -ENOTCONN;
1585                 }
1586                 break;
1587         }
1588         case IP_RECVERR:
1589                 val = inet->recverr;
1590                 break;
1591         case IP_MULTICAST_TTL:
1592                 val = inet->mc_ttl;
1593                 break;
1594         case IP_MULTICAST_LOOP:
1595                 val = inet->mc_loop;
1596                 break;
1597         case IP_UNICAST_IF:
1598                 val = (__force int)htonl((__u32) inet->uc_index);
1599                 break;
1600         case IP_MULTICAST_IF:
1601         {
1602                 struct in_addr addr;
1603                 len = min_t(unsigned int, len, sizeof(struct in_addr));
1604                 addr.s_addr = inet->mc_addr;
1605                 release_sock(sk);
1606 
1607                 if (put_user(len, optlen))
1608                         return -EFAULT;
1609                 if (copy_to_user(optval, &addr, len))
1610                         return -EFAULT;
1611                 return 0;
1612         }
1613         case IP_MSFILTER:
1614         {
1615                 struct ip_msfilter msf;
1616 
1617                 if (len < IP_MSFILTER_SIZE(0)) {
1618                         err = -EINVAL;
1619                         goto out;
1620                 }
1621                 if (copy_from_user(&msf, optval, IP_MSFILTER_SIZE(0))) {
1622                         err = -EFAULT;
1623                         goto out;
1624                 }
1625                 err = ip_mc_msfget(sk, &msf,
1626                                    (struct ip_msfilter __user *)optval, optlen);
1627                 goto out;
1628         }
1629         case MCAST_MSFILTER:
1630         {
1631                 struct group_filter __user *p = (void __user *)optval;
1632                 struct group_filter gsf;
1633                 const int size0 = offsetof(struct group_filter, gf_slist);
1634                 int num;
1635 
1636                 if (len < size0) {
1637                         err = -EINVAL;
1638                         goto out;
1639                 }
1640                 if (copy_from_user(&gsf, p, size0)) {
1641                         err = -EFAULT;
1642                         goto out;
1643                 }
1644                 num = gsf.gf_numsrc;
1645                 err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
1646                 if (err)
1647                         goto out;
1648                 if (gsf.gf_numsrc < num)
1649                         num = gsf.gf_numsrc;
1650                 if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
1651                     copy_to_user(p, &gsf, size0))
1652                         err = -EFAULT;
1653                 goto out;
1654         }
1655         case IP_MULTICAST_ALL:
1656                 val = inet->mc_all;
1657                 break;
1658         case IP_PKTOPTIONS:
1659         {
1660                 struct msghdr msg;
1661 
1662                 release_sock(sk);
1663 
1664                 if (sk->sk_type != SOCK_STREAM)
1665                         return -ENOPROTOOPT;
1666 
1667                 msg.msg_control_is_user = true;
1668                 msg.msg_control_user = optval;
1669                 msg.msg_controllen = len;
1670                 msg.msg_flags = flags;
1671 
1672                 if (inet->cmsg_flags & IP_CMSG_PKTINFO) {
1673                         struct in_pktinfo info;
1674 
1675                         info.ipi_addr.s_addr = inet->inet_rcv_saddr;
1676                         info.ipi_spec_dst.s_addr = inet->inet_rcv_saddr;
1677                         info.ipi_ifindex = inet->mc_index;
1678                         put_cmsg(&msg, SOL_IP, IP_PKTINFO, sizeof(info), &info);
1679                 }
1680                 if (inet->cmsg_flags & IP_CMSG_TTL) {
1681                         int hlim = inet->mc_ttl;
1682                         put_cmsg(&msg, SOL_IP, IP_TTL, sizeof(hlim), &hlim);
1683                 }
1684                 if (inet->cmsg_flags & IP_CMSG_TOS) {
1685                         int tos = inet->rcv_tos;
1686                         put_cmsg(&msg, SOL_IP, IP_TOS, sizeof(tos), &tos);
1687                 }
1688                 len -= msg.msg_controllen;
1689                 return put_user(len, optlen);
1690         }
1691         case IP_FREEBIND:
1692                 val = inet->freebind;
1693                 break;
1694         case IP_TRANSPARENT:
1695                 val = inet->transparent;
1696                 break;
1697         case IP_MINTTL:
1698                 val = inet->min_ttl;
1699                 break;
1700         default:
1701                 release_sock(sk);
1702                 return -ENOPROTOOPT;
1703         }
1704         release_sock(sk);
1705 
1706         if (len < sizeof(int) && len > 0 && val >= 0 && val <= 255) {
1707                 unsigned char ucval = (unsigned char)val;
1708                 len = 1;
1709                 if (put_user(len, optlen))
1710                         return -EFAULT;
1711                 if (copy_to_user(optval, &ucval, 1))
1712                         return -EFAULT;
1713         } else {
1714                 len = min_t(unsigned int, sizeof(int), len);
1715                 if (put_user(len, optlen))
1716                         return -EFAULT;
1717                 if (copy_to_user(optval, &val, len))
1718                         return -EFAULT;
1719         }
1720         return 0;
1721 
1722 out:
1723         release_sock(sk);
1724         if (needs_rtnl)
1725                 rtnl_unlock();
1726         return err;
1727 }
1728 
1729 int ip_getsockopt(struct sock *sk, int level,
1730                   int optname, char __user *optval, int __user *optlen)
1731 {
1732         int err;
1733 
1734         err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
1735 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1736         if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1737             optname < BPFILTER_IPT_GET_MAX)
1738                 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1739 #endif
1740 #ifdef CONFIG_NETFILTER
1741         /* we need to exclude all possible ENOPROTOOPTs except default case */
1742         if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1743                         !ip_mroute_opt(optname)) {
1744                 int len;
1745 
1746                 if (get_user(len, optlen))
1747                         return -EFAULT;
1748 
1749                 err = nf_getsockopt(sk, PF_INET, optname, optval, &len);
1750                 if (err >= 0)
1751                         err = put_user(len, optlen);
1752                 return err;
1753         }
1754 #endif
1755         return err;
1756 }
1757 EXPORT_SYMBOL(ip_getsockopt);
1758 
1759 #ifdef CONFIG_COMPAT
1760 int compat_ip_getsockopt(struct sock *sk, int level, int optname,
1761                          char __user *optval, int __user *optlen)
1762 {
1763         int err;
1764 
1765         if (optname == MCAST_MSFILTER) {
1766                 const int size0 = offsetof(struct compat_group_filter, gf_slist);
1767                 struct compat_group_filter __user *p = (void __user *)optval;
1768                 struct compat_group_filter gf32;
1769                 struct group_filter gf;
1770                 int ulen, err;
1771                 int num;
1772 
1773                 if (level != SOL_IP)
1774                         return -EOPNOTSUPP;
1775 
1776                 if (get_user(ulen, optlen))
1777                         return -EFAULT;
1778 
1779                 if (ulen < size0)
1780                         return -EINVAL;
1781 
1782                 if (copy_from_user(&gf32, p, size0))
1783                         return -EFAULT;
1784 
1785                 gf.gf_interface = gf32.gf_interface;
1786                 gf.gf_fmode = gf32.gf_fmode;
1787                 num = gf.gf_numsrc = gf32.gf_numsrc;
1788                 gf.gf_group = gf32.gf_group;
1789 
1790                 rtnl_lock();
1791                 lock_sock(sk);
1792                 err = ip_mc_gsfget(sk, &gf, p->gf_slist);
1793                 release_sock(sk);
1794                 rtnl_unlock();
1795                 if (err)
1796                         return err;
1797                 if (gf.gf_numsrc < num)
1798                         num = gf.gf_numsrc;
1799                 ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
1800                 if (put_user(ulen, optlen) ||
1801                     put_user(gf.gf_fmode, &p->gf_fmode) ||
1802                     put_user(gf.gf_numsrc, &p->gf_numsrc))
1803                         return -EFAULT;
1804                 return 0;
1805         }
1806 
1807         err = do_ip_getsockopt(sk, level, optname, optval, optlen,
1808                 MSG_CMSG_COMPAT);
1809 
1810 #if IS_ENABLED(CONFIG_BPFILTER_UMH)
1811         if (optname >= BPFILTER_IPT_SO_GET_INFO &&
1812             optname < BPFILTER_IPT_GET_MAX)
1813                 err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
1814 #endif
1815 #ifdef CONFIG_NETFILTER
1816         /* we need to exclude all possible ENOPROTOOPTs except default case */
1817         if (err == -ENOPROTOOPT && optname != IP_PKTOPTIONS &&
1818                         !ip_mroute_opt(optname)) {
1819                 int len;
1820 
1821                 if (get_user(len, optlen))
1822                         return -EFAULT;
1823 
1824                 err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len);
1825                 if (err >= 0)
1826                         err = put_user(len, optlen);
1827                 return err;
1828         }
1829 #endif
1830         return err;
1831 }
1832 EXPORT_SYMBOL(compat_ip_getsockopt);
1833 #endif
1834 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp