~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_tunnel.c

Version: ~ [ linux-5.16-rc3 ] ~ [ linux-5.15.5 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.82 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.162 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.218 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.256 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.291 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.293 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2013 Nicira, Inc.
  3  *
  4  * This program is free software; you can redistribute it and/or
  5  * modify it under the terms of version 2 of the GNU General Public
  6  * License as published by the Free Software Foundation.
  7  *
  8  * This program is distributed in the hope that it will be useful, but
  9  * WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11  * General Public License for more details.
 12  *
 13  * You should have received a copy of the GNU General Public License
 14  * along with this program; if not, write to the Free Software
 15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 16  * 02110-1301, USA
 17  */
 18 
 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 20 
 21 #include <linux/capability.h>
 22 #include <linux/module.h>
 23 #include <linux/types.h>
 24 #include <linux/kernel.h>
 25 #include <linux/slab.h>
 26 #include <linux/uaccess.h>
 27 #include <linux/skbuff.h>
 28 #include <linux/netdevice.h>
 29 #include <linux/in.h>
 30 #include <linux/tcp.h>
 31 #include <linux/udp.h>
 32 #include <linux/if_arp.h>
 33 #include <linux/mroute.h>
 34 #include <linux/init.h>
 35 #include <linux/in6.h>
 36 #include <linux/inetdevice.h>
 37 #include <linux/igmp.h>
 38 #include <linux/netfilter_ipv4.h>
 39 #include <linux/etherdevice.h>
 40 #include <linux/if_ether.h>
 41 #include <linux/if_vlan.h>
 42 #include <linux/rculist.h>
 43 #include <linux/err.h>
 44 
 45 #include <net/sock.h>
 46 #include <net/ip.h>
 47 #include <net/icmp.h>
 48 #include <net/protocol.h>
 49 #include <net/ip_tunnels.h>
 50 #include <net/arp.h>
 51 #include <net/checksum.h>
 52 #include <net/dsfield.h>
 53 #include <net/inet_ecn.h>
 54 #include <net/xfrm.h>
 55 #include <net/net_namespace.h>
 56 #include <net/netns/generic.h>
 57 #include <net/rtnetlink.h>
 58 #include <net/udp.h>
 59 
 60 #if IS_ENABLED(CONFIG_IPV6)
 61 #include <net/ipv6.h>
 62 #include <net/ip6_fib.h>
 63 #include <net/ip6_route.h>
 64 #endif
 65 
 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
 67 {
 68         return hash_32((__force u32)key ^ (__force u32)remote,
 69                          IP_TNL_HASH_BITS);
 70 }
 71 
 72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
 73                              struct dst_entry *dst, __be32 saddr)
 74 {
 75         struct dst_entry *old_dst;
 76 
 77         dst_clone(dst);
 78         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
 79         dst_release(old_dst);
 80         idst->saddr = saddr;
 81 }
 82 
 83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
 84                            struct dst_entry *dst, __be32 saddr)
 85 {
 86         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
 87 }
 88 
 89 static void tunnel_dst_reset(struct ip_tunnel *t)
 90 {
 91         tunnel_dst_set(t, NULL, 0);
 92 }
 93 
 94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
 95 {
 96         int i;
 97 
 98         for_each_possible_cpu(i)
 99                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100 }
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102 
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104                                         u32 cookie, __be32 *saddr)
105 {
106         struct ip_tunnel_dst *idst;
107         struct dst_entry *dst;
108 
109         rcu_read_lock();
110         idst = raw_cpu_ptr(t->dst_cache);
111         dst = rcu_dereference(idst->dst);
112         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113                 dst = NULL;
114         if (dst) {
115                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116                         *saddr = idst->saddr;
117                 } else {
118                         tunnel_dst_reset(t);
119                         dst_release(dst);
120                         dst = NULL;
121                 }
122         }
123         rcu_read_unlock();
124         return (struct rtable *)dst;
125 }
126 
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128                                 __be16 flags, __be32 key)
129 {
130         if (p->i_flags & TUNNEL_KEY) {
131                 if (flags & TUNNEL_KEY)
132                         return key == p->i_key;
133                 else
134                         /* key expected, none present */
135                         return false;
136         } else
137                 return !(flags & TUNNEL_KEY);
138 }
139 
140 /* Fallback tunnel: no source, no destination, no key, no options
141 
142    Tunnel hash table:
143    We require exact key match i.e. if a key is present in packet
144    it will match only tunnel with the same key; if it is not present,
145    it will match only keyless tunnel.
146 
147    All keysless packets, if not matched configured keyless tunnels
148    will match fallback tunnel.
149    Given src, dst and key, find appropriate for input tunnel.
150 */
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152                                    int link, __be16 flags,
153                                    __be32 remote, __be32 local,
154                                    __be32 key)
155 {
156         unsigned int hash;
157         struct ip_tunnel *t, *cand = NULL;
158         struct hlist_head *head;
159 
160         hash = ip_tunnel_hash(key, remote);
161         head = &itn->tunnels[hash];
162 
163         hlist_for_each_entry_rcu(t, head, hash_node) {
164                 if (local != t->parms.iph.saddr ||
165                     remote != t->parms.iph.daddr ||
166                     !(t->dev->flags & IFF_UP))
167                         continue;
168 
169                 if (!ip_tunnel_key_match(&t->parms, flags, key))
170                         continue;
171 
172                 if (t->parms.link == link)
173                         return t;
174                 else
175                         cand = t;
176         }
177 
178         hlist_for_each_entry_rcu(t, head, hash_node) {
179                 if (remote != t->parms.iph.daddr ||
180                     t->parms.iph.saddr != 0 ||
181                     !(t->dev->flags & IFF_UP))
182                         continue;
183 
184                 if (!ip_tunnel_key_match(&t->parms, flags, key))
185                         continue;
186 
187                 if (t->parms.link == link)
188                         return t;
189                 else if (!cand)
190                         cand = t;
191         }
192 
193         hash = ip_tunnel_hash(key, 0);
194         head = &itn->tunnels[hash];
195 
196         hlist_for_each_entry_rcu(t, head, hash_node) {
197                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199                         continue;
200 
201                 if (!(t->dev->flags & IFF_UP))
202                         continue;
203 
204                 if (!ip_tunnel_key_match(&t->parms, flags, key))
205                         continue;
206 
207                 if (t->parms.link == link)
208                         return t;
209                 else if (!cand)
210                         cand = t;
211         }
212 
213         if (flags & TUNNEL_NO_KEY)
214                 goto skip_key_lookup;
215 
216         hlist_for_each_entry_rcu(t, head, hash_node) {
217                 if (t->parms.i_key != key ||
218                     t->parms.iph.saddr != 0 ||
219                     t->parms.iph.daddr != 0 ||
220                     !(t->dev->flags & IFF_UP))
221                         continue;
222 
223                 if (t->parms.link == link)
224                         return t;
225                 else if (!cand)
226                         cand = t;
227         }
228 
229 skip_key_lookup:
230         if (cand)
231                 return cand;
232 
233         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234                 return netdev_priv(itn->fb_tunnel_dev);
235 
236 
237         return NULL;
238 }
239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240 
241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242                                     struct ip_tunnel_parm *parms)
243 {
244         unsigned int h;
245         __be32 remote;
246         __be32 i_key = parms->i_key;
247 
248         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249                 remote = parms->iph.daddr;
250         else
251                 remote = 0;
252 
253         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
254                 i_key = 0;
255 
256         h = ip_tunnel_hash(i_key, remote);
257         return &itn->tunnels[h];
258 }
259 
260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261 {
262         struct hlist_head *head = ip_bucket(itn, &t->parms);
263 
264         hlist_add_head_rcu(&t->hash_node, head);
265 }
266 
267 static void ip_tunnel_del(struct ip_tunnel *t)
268 {
269         hlist_del_init_rcu(&t->hash_node);
270 }
271 
272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273                                         struct ip_tunnel_parm *parms,
274                                         int type)
275 {
276         __be32 remote = parms->iph.daddr;
277         __be32 local = parms->iph.saddr;
278         __be32 key = parms->i_key;
279         __be16 flags = parms->i_flags;
280         int link = parms->link;
281         struct ip_tunnel *t = NULL;
282         struct hlist_head *head = ip_bucket(itn, parms);
283 
284         hlist_for_each_entry_rcu(t, head, hash_node) {
285                 if (local == t->parms.iph.saddr &&
286                     remote == t->parms.iph.daddr &&
287                     link == t->parms.link &&
288                     type == t->dev->type &&
289                     ip_tunnel_key_match(&t->parms, flags, key))
290                         break;
291         }
292         return t;
293 }
294 
295 static struct net_device *__ip_tunnel_create(struct net *net,
296                                              const struct rtnl_link_ops *ops,
297                                              struct ip_tunnel_parm *parms)
298 {
299         int err;
300         struct ip_tunnel *tunnel;
301         struct net_device *dev;
302         char name[IFNAMSIZ];
303 
304         if (parms->name[0])
305                 strlcpy(name, parms->name, IFNAMSIZ);
306         else {
307                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
308                         err = -E2BIG;
309                         goto failed;
310                 }
311                 strlcpy(name, ops->kind, IFNAMSIZ);
312                 strncat(name, "%d", 2);
313         }
314 
315         ASSERT_RTNL();
316         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
317         if (!dev) {
318                 err = -ENOMEM;
319                 goto failed;
320         }
321         dev_net_set(dev, net);
322 
323         dev->rtnl_link_ops = ops;
324 
325         tunnel = netdev_priv(dev);
326         tunnel->parms = *parms;
327         tunnel->net = net;
328 
329         err = register_netdevice(dev);
330         if (err)
331                 goto failed_free;
332 
333         return dev;
334 
335 failed_free:
336         free_netdev(dev);
337 failed:
338         return ERR_PTR(err);
339 }
340 
341 static inline void init_tunnel_flow(struct flowi4 *fl4,
342                                     int proto,
343                                     __be32 daddr, __be32 saddr,
344                                     __be32 key, __u8 tos, int oif)
345 {
346         memset(fl4, 0, sizeof(*fl4));
347         fl4->flowi4_oif = oif;
348         fl4->daddr = daddr;
349         fl4->saddr = saddr;
350         fl4->flowi4_tos = tos;
351         fl4->flowi4_proto = proto;
352         fl4->fl4_gre_key = key;
353 }
354 
355 static int ip_tunnel_bind_dev(struct net_device *dev)
356 {
357         struct net_device *tdev = NULL;
358         struct ip_tunnel *tunnel = netdev_priv(dev);
359         const struct iphdr *iph;
360         int hlen = LL_MAX_HEADER;
361         int mtu = ETH_DATA_LEN;
362         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363 
364         iph = &tunnel->parms.iph;
365 
366         /* Guess output device to choose reasonable mtu and needed_headroom */
367         if (iph->daddr) {
368                 struct flowi4 fl4;
369                 struct rtable *rt;
370 
371                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372                                  iph->saddr, tunnel->parms.o_key,
373                                  RT_TOS(iph->tos), tunnel->parms.link);
374                 rt = ip_route_output_key(tunnel->net, &fl4);
375 
376                 if (!IS_ERR(rt)) {
377                         tdev = rt->dst.dev;
378                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
379                         ip_rt_put(rt);
380                 }
381                 if (dev->type != ARPHRD_ETHER)
382                         dev->flags |= IFF_POINTOPOINT;
383         }
384 
385         if (!tdev && tunnel->parms.link)
386                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
387 
388         if (tdev) {
389                 hlen = tdev->hard_header_len + tdev->needed_headroom;
390                 mtu = tdev->mtu;
391         }
392         dev->iflink = tunnel->parms.link;
393 
394         dev->needed_headroom = t_hlen + hlen;
395         mtu -= (dev->hard_header_len + t_hlen);
396 
397         if (mtu < 68)
398                 mtu = 68;
399 
400         return mtu;
401 }
402 
403 static struct ip_tunnel *ip_tunnel_create(struct net *net,
404                                           struct ip_tunnel_net *itn,
405                                           struct ip_tunnel_parm *parms)
406 {
407         struct ip_tunnel *nt;
408         struct net_device *dev;
409 
410         BUG_ON(!itn->fb_tunnel_dev);
411         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
412         if (IS_ERR(dev))
413                 return ERR_CAST(dev);
414 
415         dev->mtu = ip_tunnel_bind_dev(dev);
416 
417         nt = netdev_priv(dev);
418         ip_tunnel_add(itn, nt);
419         return nt;
420 }
421 
422 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
423                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
424 {
425         struct pcpu_sw_netstats *tstats;
426         const struct iphdr *iph = ip_hdr(skb);
427         int err;
428 
429 #ifdef CONFIG_NET_IPGRE_BROADCAST
430         if (ipv4_is_multicast(iph->daddr)) {
431                 tunnel->dev->stats.multicast++;
432                 skb->pkt_type = PACKET_BROADCAST;
433         }
434 #endif
435 
436         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
437              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
438                 tunnel->dev->stats.rx_crc_errors++;
439                 tunnel->dev->stats.rx_errors++;
440                 goto drop;
441         }
442 
443         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
444                 if (!(tpi->flags&TUNNEL_SEQ) ||
445                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
446                         tunnel->dev->stats.rx_fifo_errors++;
447                         tunnel->dev->stats.rx_errors++;
448                         goto drop;
449                 }
450                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
451         }
452 
453         skb_reset_network_header(skb);
454 
455         err = IP_ECN_decapsulate(iph, skb);
456         if (unlikely(err)) {
457                 if (log_ecn_error)
458                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
459                                         &iph->saddr, iph->tos);
460                 if (err > 1) {
461                         ++tunnel->dev->stats.rx_frame_errors;
462                         ++tunnel->dev->stats.rx_errors;
463                         goto drop;
464                 }
465         }
466 
467         tstats = this_cpu_ptr(tunnel->dev->tstats);
468         u64_stats_update_begin(&tstats->syncp);
469         tstats->rx_packets++;
470         tstats->rx_bytes += skb->len;
471         u64_stats_update_end(&tstats->syncp);
472 
473         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
474 
475         if (tunnel->dev->type == ARPHRD_ETHER) {
476                 skb->protocol = eth_type_trans(skb, tunnel->dev);
477                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
478         } else {
479                 skb->dev = tunnel->dev;
480         }
481 
482         gro_cells_receive(&tunnel->gro_cells, skb);
483         return 0;
484 
485 drop:
486         kfree_skb(skb);
487         return 0;
488 }
489 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
490 
491 static int ip_encap_hlen(struct ip_tunnel_encap *e)
492 {
493         const struct ip_tunnel_encap_ops *ops;
494         int hlen = -EINVAL;
495 
496         if (e->type == TUNNEL_ENCAP_NONE)
497                 return 0;
498 
499         if (e->type >= MAX_IPTUN_ENCAP_OPS)
500                 return -EINVAL;
501 
502         rcu_read_lock();
503         ops = rcu_dereference(iptun_encaps[e->type]);
504         if (likely(ops && ops->encap_hlen))
505                 hlen = ops->encap_hlen(e);
506         rcu_read_unlock();
507 
508         return hlen;
509 }
510 
511 const struct ip_tunnel_encap_ops __rcu *
512                 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
513 
514 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
515                             unsigned int num)
516 {
517         if (num >= MAX_IPTUN_ENCAP_OPS)
518                 return -ERANGE;
519 
520         return !cmpxchg((const struct ip_tunnel_encap_ops **)
521                         &iptun_encaps[num],
522                         NULL, ops) ? 0 : -1;
523 }
524 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
525 
526 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
527                             unsigned int num)
528 {
529         int ret;
530 
531         if (num >= MAX_IPTUN_ENCAP_OPS)
532                 return -ERANGE;
533 
534         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
535                        &iptun_encaps[num],
536                        ops, NULL) == ops) ? 0 : -1;
537 
538         synchronize_net();
539 
540         return ret;
541 }
542 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
543 
544 int ip_tunnel_encap_setup(struct ip_tunnel *t,
545                           struct ip_tunnel_encap *ipencap)
546 {
547         int hlen;
548 
549         memset(&t->encap, 0, sizeof(t->encap));
550 
551         hlen = ip_encap_hlen(ipencap);
552         if (hlen < 0)
553                 return hlen;
554 
555         t->encap.type = ipencap->type;
556         t->encap.sport = ipencap->sport;
557         t->encap.dport = ipencap->dport;
558         t->encap.flags = ipencap->flags;
559 
560         t->encap_hlen = hlen;
561         t->hlen = t->encap_hlen + t->tun_hlen;
562 
563         return 0;
564 }
565 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
566 
567 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
568                     u8 *protocol, struct flowi4 *fl4)
569 {
570         const struct ip_tunnel_encap_ops *ops;
571         int ret = -EINVAL;
572 
573         if (t->encap.type == TUNNEL_ENCAP_NONE)
574                 return 0;
575 
576         if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
577                 return -EINVAL;
578 
579         rcu_read_lock();
580         ops = rcu_dereference(iptun_encaps[t->encap.type]);
581         if (likely(ops && ops->build_header))
582                 ret = ops->build_header(skb, &t->encap, protocol, fl4);
583         rcu_read_unlock();
584 
585         return ret;
586 }
587 EXPORT_SYMBOL(ip_tunnel_encap);
588 
589 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
590                             struct rtable *rt, __be16 df)
591 {
592         struct ip_tunnel *tunnel = netdev_priv(dev);
593         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
594         int mtu;
595 
596         if (df)
597                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
598                                         - sizeof(struct iphdr) - tunnel->hlen;
599         else
600                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
601 
602         if (skb_dst(skb))
603                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
604 
605         if (skb->protocol == htons(ETH_P_IP)) {
606                 if (!skb_is_gso(skb) &&
607                     (df & htons(IP_DF)) && mtu < pkt_size) {
608                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
609                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
610                         return -E2BIG;
611                 }
612         }
613 #if IS_ENABLED(CONFIG_IPV6)
614         else if (skb->protocol == htons(ETH_P_IPV6)) {
615                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
616 
617                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
618                            mtu >= IPV6_MIN_MTU) {
619                         if ((tunnel->parms.iph.daddr &&
620                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
621                             rt6->rt6i_dst.plen == 128) {
622                                 rt6->rt6i_flags |= RTF_MODIFIED;
623                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
624                         }
625                 }
626 
627                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
628                                         mtu < pkt_size) {
629                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
630                         return -E2BIG;
631                 }
632         }
633 #endif
634         return 0;
635 }
636 
637 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
638                     const struct iphdr *tnl_params, u8 protocol)
639 {
640         struct ip_tunnel *tunnel = netdev_priv(dev);
641         const struct iphdr *inner_iph;
642         struct flowi4 fl4;
643         u8     tos, ttl;
644         __be16 df;
645         struct rtable *rt;              /* Route to the other host */
646         unsigned int max_headroom;      /* The extra header space needed */
647         __be32 dst;
648         int err;
649         bool connected;
650 
651         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
652         connected = (tunnel->parms.iph.daddr != 0);
653 
654         dst = tnl_params->daddr;
655         if (dst == 0) {
656                 /* NBMA tunnel */
657 
658                 if (skb_dst(skb) == NULL) {
659                         dev->stats.tx_fifo_errors++;
660                         goto tx_error;
661                 }
662 
663                 if (skb->protocol == htons(ETH_P_IP)) {
664                         rt = skb_rtable(skb);
665                         dst = rt_nexthop(rt, inner_iph->daddr);
666                 }
667 #if IS_ENABLED(CONFIG_IPV6)
668                 else if (skb->protocol == htons(ETH_P_IPV6)) {
669                         const struct in6_addr *addr6;
670                         struct neighbour *neigh;
671                         bool do_tx_error_icmp;
672                         int addr_type;
673 
674                         neigh = dst_neigh_lookup(skb_dst(skb),
675                                                  &ipv6_hdr(skb)->daddr);
676                         if (neigh == NULL)
677                                 goto tx_error;
678 
679                         addr6 = (const struct in6_addr *)&neigh->primary_key;
680                         addr_type = ipv6_addr_type(addr6);
681 
682                         if (addr_type == IPV6_ADDR_ANY) {
683                                 addr6 = &ipv6_hdr(skb)->daddr;
684                                 addr_type = ipv6_addr_type(addr6);
685                         }
686 
687                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
688                                 do_tx_error_icmp = true;
689                         else {
690                                 do_tx_error_icmp = false;
691                                 dst = addr6->s6_addr32[3];
692                         }
693                         neigh_release(neigh);
694                         if (do_tx_error_icmp)
695                                 goto tx_error_icmp;
696                 }
697 #endif
698                 else
699                         goto tx_error;
700 
701                 connected = false;
702         }
703 
704         tos = tnl_params->tos;
705         if (tos & 0x1) {
706                 tos &= ~0x1;
707                 if (skb->protocol == htons(ETH_P_IP)) {
708                         tos = inner_iph->tos;
709                         connected = false;
710                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
711                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
712                         connected = false;
713                 }
714         }
715 
716         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
717                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
718 
719         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
720                 goto tx_error;
721 
722         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
723 
724         if (!rt) {
725                 rt = ip_route_output_key(tunnel->net, &fl4);
726 
727                 if (IS_ERR(rt)) {
728                         dev->stats.tx_carrier_errors++;
729                         goto tx_error;
730                 }
731                 if (connected)
732                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
733         }
734 
735         if (rt->dst.dev == dev) {
736                 ip_rt_put(rt);
737                 dev->stats.collisions++;
738                 goto tx_error;
739         }
740 
741         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
742                 ip_rt_put(rt);
743                 goto tx_error;
744         }
745 
746         if (tunnel->err_count > 0) {
747                 if (time_before(jiffies,
748                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
749                         tunnel->err_count--;
750 
751                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
752                         dst_link_failure(skb);
753                 } else
754                         tunnel->err_count = 0;
755         }
756 
757         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
758         ttl = tnl_params->ttl;
759         if (ttl == 0) {
760                 if (skb->protocol == htons(ETH_P_IP))
761                         ttl = inner_iph->ttl;
762 #if IS_ENABLED(CONFIG_IPV6)
763                 else if (skb->protocol == htons(ETH_P_IPV6))
764                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
765 #endif
766                 else
767                         ttl = ip4_dst_hoplimit(&rt->dst);
768         }
769 
770         df = tnl_params->frag_off;
771         if (skb->protocol == htons(ETH_P_IP))
772                 df |= (inner_iph->frag_off&htons(IP_DF));
773 
774         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
775                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
776         if (max_headroom > dev->needed_headroom)
777                 dev->needed_headroom = max_headroom;
778 
779         if (skb_cow_head(skb, dev->needed_headroom)) {
780                 ip_rt_put(rt);
781                 dev->stats.tx_dropped++;
782                 kfree_skb(skb);
783                 return;
784         }
785 
786         err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol,
787                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
788         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
789 
790         return;
791 
792 #if IS_ENABLED(CONFIG_IPV6)
793 tx_error_icmp:
794         dst_link_failure(skb);
795 #endif
796 tx_error:
797         dev->stats.tx_errors++;
798         kfree_skb(skb);
799 }
800 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
801 
802 static void ip_tunnel_update(struct ip_tunnel_net *itn,
803                              struct ip_tunnel *t,
804                              struct net_device *dev,
805                              struct ip_tunnel_parm *p,
806                              bool set_mtu)
807 {
808         ip_tunnel_del(t);
809         t->parms.iph.saddr = p->iph.saddr;
810         t->parms.iph.daddr = p->iph.daddr;
811         t->parms.i_key = p->i_key;
812         t->parms.o_key = p->o_key;
813         if (dev->type != ARPHRD_ETHER) {
814                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
815                 memcpy(dev->broadcast, &p->iph.daddr, 4);
816         }
817         ip_tunnel_add(itn, t);
818 
819         t->parms.iph.ttl = p->iph.ttl;
820         t->parms.iph.tos = p->iph.tos;
821         t->parms.iph.frag_off = p->iph.frag_off;
822 
823         if (t->parms.link != p->link) {
824                 int mtu;
825 
826                 t->parms.link = p->link;
827                 mtu = ip_tunnel_bind_dev(dev);
828                 if (set_mtu)
829                         dev->mtu = mtu;
830         }
831         ip_tunnel_dst_reset_all(t);
832         netdev_state_change(dev);
833 }
834 
835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
836 {
837         int err = 0;
838         struct ip_tunnel *t = netdev_priv(dev);
839         struct net *net = t->net;
840         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
841 
842         BUG_ON(!itn->fb_tunnel_dev);
843         switch (cmd) {
844         case SIOCGETTUNNEL:
845                 if (dev == itn->fb_tunnel_dev) {
846                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
847                         if (t == NULL)
848                                 t = netdev_priv(dev);
849                 }
850                 memcpy(p, &t->parms, sizeof(*p));
851                 break;
852 
853         case SIOCADDTUNNEL:
854         case SIOCCHGTUNNEL:
855                 err = -EPERM;
856                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
857                         goto done;
858                 if (p->iph.ttl)
859                         p->iph.frag_off |= htons(IP_DF);
860                 if (!(p->i_flags & VTI_ISVTI)) {
861                         if (!(p->i_flags & TUNNEL_KEY))
862                                 p->i_key = 0;
863                         if (!(p->o_flags & TUNNEL_KEY))
864                                 p->o_key = 0;
865                 }
866 
867                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
868 
869                 if (cmd == SIOCADDTUNNEL) {
870                         if (!t) {
871                                 t = ip_tunnel_create(net, itn, p);
872                                 err = PTR_ERR_OR_ZERO(t);
873                                 break;
874                         }
875 
876                         err = -EEXIST;
877                         break;
878                 }
879                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
880                         if (t != NULL) {
881                                 if (t->dev != dev) {
882                                         err = -EEXIST;
883                                         break;
884                                 }
885                         } else {
886                                 unsigned int nflags = 0;
887 
888                                 if (ipv4_is_multicast(p->iph.daddr))
889                                         nflags = IFF_BROADCAST;
890                                 else if (p->iph.daddr)
891                                         nflags = IFF_POINTOPOINT;
892 
893                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
894                                         err = -EINVAL;
895                                         break;
896                                 }
897 
898                                 t = netdev_priv(dev);
899                         }
900                 }
901 
902                 if (t) {
903                         err = 0;
904                         ip_tunnel_update(itn, t, dev, p, true);
905                 } else {
906                         err = -ENOENT;
907                 }
908                 break;
909 
910         case SIOCDELTUNNEL:
911                 err = -EPERM;
912                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913                         goto done;
914 
915                 if (dev == itn->fb_tunnel_dev) {
916                         err = -ENOENT;
917                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918                         if (t == NULL)
919                                 goto done;
920                         err = -EPERM;
921                         if (t == netdev_priv(itn->fb_tunnel_dev))
922                                 goto done;
923                         dev = t->dev;
924                 }
925                 unregister_netdevice(dev);
926                 err = 0;
927                 break;
928 
929         default:
930                 err = -EINVAL;
931         }
932 
933 done:
934         return err;
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
937 
938 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
939 {
940         struct ip_tunnel *tunnel = netdev_priv(dev);
941         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
942 
943         if (new_mtu < 68 ||
944             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
945                 return -EINVAL;
946         dev->mtu = new_mtu;
947         return 0;
948 }
949 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
950 
951 static void ip_tunnel_dev_free(struct net_device *dev)
952 {
953         struct ip_tunnel *tunnel = netdev_priv(dev);
954 
955         gro_cells_destroy(&tunnel->gro_cells);
956         free_percpu(tunnel->dst_cache);
957         free_percpu(dev->tstats);
958         free_netdev(dev);
959 }
960 
961 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
962 {
963         struct ip_tunnel *tunnel = netdev_priv(dev);
964         struct ip_tunnel_net *itn;
965 
966         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
967 
968         if (itn->fb_tunnel_dev != dev) {
969                 ip_tunnel_del(netdev_priv(dev));
970                 unregister_netdevice_queue(dev, head);
971         }
972 }
973 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
974 
975 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
976 {
977         struct ip_tunnel *tunnel = netdev_priv(dev);
978 
979         return tunnel->net;
980 }
981 EXPORT_SYMBOL(ip_tunnel_get_link_net);
982 
983 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
984                                   struct rtnl_link_ops *ops, char *devname)
985 {
986         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
987         struct ip_tunnel_parm parms;
988         unsigned int i;
989 
990         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
991                 INIT_HLIST_HEAD(&itn->tunnels[i]);
992 
993         if (!ops) {
994                 itn->fb_tunnel_dev = NULL;
995                 return 0;
996         }
997 
998         memset(&parms, 0, sizeof(parms));
999         if (devname)
1000                 strlcpy(parms.name, devname, IFNAMSIZ);
1001 
1002         rtnl_lock();
1003         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1004         /* FB netdevice is special: we have one, and only one per netns.
1005          * Allowing to move it to another netns is clearly unsafe.
1006          */
1007         if (!IS_ERR(itn->fb_tunnel_dev)) {
1008                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1009                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1010                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1011         }
1012         rtnl_unlock();
1013 
1014         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1015 }
1016 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1017 
1018 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1019                               struct rtnl_link_ops *ops)
1020 {
1021         struct net *net = dev_net(itn->fb_tunnel_dev);
1022         struct net_device *dev, *aux;
1023         int h;
1024 
1025         for_each_netdev_safe(net, dev, aux)
1026                 if (dev->rtnl_link_ops == ops)
1027                         unregister_netdevice_queue(dev, head);
1028 
1029         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1030                 struct ip_tunnel *t;
1031                 struct hlist_node *n;
1032                 struct hlist_head *thead = &itn->tunnels[h];
1033 
1034                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1035                         /* If dev is in the same netns, it has already
1036                          * been added to the list by the previous loop.
1037                          */
1038                         if (!net_eq(dev_net(t->dev), net))
1039                                 unregister_netdevice_queue(t->dev, head);
1040         }
1041 }
1042 
1043 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1044 {
1045         LIST_HEAD(list);
1046 
1047         rtnl_lock();
1048         ip_tunnel_destroy(itn, &list, ops);
1049         unregister_netdevice_many(&list);
1050         rtnl_unlock();
1051 }
1052 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1053 
1054 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1055                       struct ip_tunnel_parm *p)
1056 {
1057         struct ip_tunnel *nt;
1058         struct net *net = dev_net(dev);
1059         struct ip_tunnel_net *itn;
1060         int mtu;
1061         int err;
1062 
1063         nt = netdev_priv(dev);
1064         itn = net_generic(net, nt->ip_tnl_net_id);
1065 
1066         if (ip_tunnel_find(itn, p, dev->type))
1067                 return -EEXIST;
1068 
1069         nt->net = net;
1070         nt->parms = *p;
1071         err = register_netdevice(dev);
1072         if (err)
1073                 goto out;
1074 
1075         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1076                 eth_hw_addr_random(dev);
1077 
1078         mtu = ip_tunnel_bind_dev(dev);
1079         if (!tb[IFLA_MTU])
1080                 dev->mtu = mtu;
1081 
1082         ip_tunnel_add(itn, nt);
1083 
1084 out:
1085         return err;
1086 }
1087 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1088 
1089 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1090                          struct ip_tunnel_parm *p)
1091 {
1092         struct ip_tunnel *t;
1093         struct ip_tunnel *tunnel = netdev_priv(dev);
1094         struct net *net = tunnel->net;
1095         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1096 
1097         if (dev == itn->fb_tunnel_dev)
1098                 return -EINVAL;
1099 
1100         t = ip_tunnel_find(itn, p, dev->type);
1101 
1102         if (t) {
1103                 if (t->dev != dev)
1104                         return -EEXIST;
1105         } else {
1106                 t = tunnel;
1107 
1108                 if (dev->type != ARPHRD_ETHER) {
1109                         unsigned int nflags = 0;
1110 
1111                         if (ipv4_is_multicast(p->iph.daddr))
1112                                 nflags = IFF_BROADCAST;
1113                         else if (p->iph.daddr)
1114                                 nflags = IFF_POINTOPOINT;
1115 
1116                         if ((dev->flags ^ nflags) &
1117                             (IFF_POINTOPOINT | IFF_BROADCAST))
1118                                 return -EINVAL;
1119                 }
1120         }
1121 
1122         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1123         return 0;
1124 }
1125 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1126 
1127 int ip_tunnel_init(struct net_device *dev)
1128 {
1129         struct ip_tunnel *tunnel = netdev_priv(dev);
1130         struct iphdr *iph = &tunnel->parms.iph;
1131         int err;
1132 
1133         dev->destructor = ip_tunnel_dev_free;
1134         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1135         if (!dev->tstats)
1136                 return -ENOMEM;
1137 
1138         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1139         if (!tunnel->dst_cache) {
1140                 free_percpu(dev->tstats);
1141                 return -ENOMEM;
1142         }
1143 
1144         err = gro_cells_init(&tunnel->gro_cells, dev);
1145         if (err) {
1146                 free_percpu(tunnel->dst_cache);
1147                 free_percpu(dev->tstats);
1148                 return err;
1149         }
1150 
1151         tunnel->dev = dev;
1152         tunnel->net = dev_net(dev);
1153         strcpy(tunnel->parms.name, dev->name);
1154         iph->version            = 4;
1155         iph->ihl                = 5;
1156 
1157         return 0;
1158 }
1159 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1160 
1161 void ip_tunnel_uninit(struct net_device *dev)
1162 {
1163         struct ip_tunnel *tunnel = netdev_priv(dev);
1164         struct net *net = tunnel->net;
1165         struct ip_tunnel_net *itn;
1166 
1167         itn = net_generic(net, tunnel->ip_tnl_net_id);
1168         /* fb_tunnel_dev will be unregisted in net-exit call. */
1169         if (itn->fb_tunnel_dev != dev)
1170                 ip_tunnel_del(netdev_priv(dev));
1171 
1172         ip_tunnel_dst_reset_all(tunnel);
1173 }
1174 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1175 
1176 /* Do least required initialization, rest of init is done in tunnel_init call */
1177 void ip_tunnel_setup(struct net_device *dev, int net_id)
1178 {
1179         struct ip_tunnel *tunnel = netdev_priv(dev);
1180         tunnel->ip_tnl_net_id = net_id;
1181 }
1182 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1183 
1184 MODULE_LICENSE("GPL");
1185 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp