~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_tunnel.c

Version: ~ [ linux-5.3-rc5 ] ~ [ linux-5.2.9 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.67 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.139 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.189 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.189 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.72 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2013 Nicira, Inc.
  3  *
  4  * This program is free software; you can redistribute it and/or
  5  * modify it under the terms of version 2 of the GNU General Public
  6  * License as published by the Free Software Foundation.
  7  *
  8  * This program is distributed in the hope that it will be useful, but
  9  * WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11  * General Public License for more details.
 12  *
 13  * You should have received a copy of the GNU General Public License
 14  * along with this program; if not, write to the Free Software
 15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 16  * 02110-1301, USA
 17  */
 18 
 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 20 
 21 #include <linux/capability.h>
 22 #include <linux/module.h>
 23 #include <linux/types.h>
 24 #include <linux/kernel.h>
 25 #include <linux/slab.h>
 26 #include <linux/uaccess.h>
 27 #include <linux/skbuff.h>
 28 #include <linux/netdevice.h>
 29 #include <linux/in.h>
 30 #include <linux/tcp.h>
 31 #include <linux/udp.h>
 32 #include <linux/if_arp.h>
 33 #include <linux/mroute.h>
 34 #include <linux/init.h>
 35 #include <linux/in6.h>
 36 #include <linux/inetdevice.h>
 37 #include <linux/igmp.h>
 38 #include <linux/netfilter_ipv4.h>
 39 #include <linux/etherdevice.h>
 40 #include <linux/if_ether.h>
 41 #include <linux/if_vlan.h>
 42 #include <linux/rculist.h>
 43 #include <linux/err.h>
 44 
 45 #include <net/sock.h>
 46 #include <net/ip.h>
 47 #include <net/icmp.h>
 48 #include <net/protocol.h>
 49 #include <net/ip_tunnels.h>
 50 #include <net/arp.h>
 51 #include <net/checksum.h>
 52 #include <net/dsfield.h>
 53 #include <net/inet_ecn.h>
 54 #include <net/xfrm.h>
 55 #include <net/net_namespace.h>
 56 #include <net/netns/generic.h>
 57 #include <net/rtnetlink.h>
 58 #include <net/udp.h>
 59 
 60 #if IS_ENABLED(CONFIG_IPV6)
 61 #include <net/ipv6.h>
 62 #include <net/ip6_fib.h>
 63 #include <net/ip6_route.h>
 64 #endif
 65 
 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
 67 {
 68         return hash_32((__force u32)key ^ (__force u32)remote,
 69                          IP_TNL_HASH_BITS);
 70 }
 71 
 72 static void __tunnel_dst_set(struct ip_tunnel_dst *idst,
 73                              struct dst_entry *dst, __be32 saddr)
 74 {
 75         struct dst_entry *old_dst;
 76 
 77         dst_clone(dst);
 78         old_dst = xchg((__force struct dst_entry **)&idst->dst, dst);
 79         dst_release(old_dst);
 80         idst->saddr = saddr;
 81 }
 82 
 83 static noinline void tunnel_dst_set(struct ip_tunnel *t,
 84                            struct dst_entry *dst, __be32 saddr)
 85 {
 86         __tunnel_dst_set(raw_cpu_ptr(t->dst_cache), dst, saddr);
 87 }
 88 
 89 static void tunnel_dst_reset(struct ip_tunnel *t)
 90 {
 91         tunnel_dst_set(t, NULL, 0);
 92 }
 93 
 94 void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
 95 {
 96         int i;
 97 
 98         for_each_possible_cpu(i)
 99                 __tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL, 0);
100 }
101 EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
102 
103 static struct rtable *tunnel_rtable_get(struct ip_tunnel *t,
104                                         u32 cookie, __be32 *saddr)
105 {
106         struct ip_tunnel_dst *idst;
107         struct dst_entry *dst;
108 
109         rcu_read_lock();
110         idst = raw_cpu_ptr(t->dst_cache);
111         dst = rcu_dereference(idst->dst);
112         if (dst && !atomic_inc_not_zero(&dst->__refcnt))
113                 dst = NULL;
114         if (dst) {
115                 if (!dst->obsolete || dst->ops->check(dst, cookie)) {
116                         *saddr = idst->saddr;
117                 } else {
118                         tunnel_dst_reset(t);
119                         dst_release(dst);
120                         dst = NULL;
121                 }
122         }
123         rcu_read_unlock();
124         return (struct rtable *)dst;
125 }
126 
127 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
128                                 __be16 flags, __be32 key)
129 {
130         if (p->i_flags & TUNNEL_KEY) {
131                 if (flags & TUNNEL_KEY)
132                         return key == p->i_key;
133                 else
134                         /* key expected, none present */
135                         return false;
136         } else
137                 return !(flags & TUNNEL_KEY);
138 }
139 
140 /* Fallback tunnel: no source, no destination, no key, no options
141 
142    Tunnel hash table:
143    We require exact key match i.e. if a key is present in packet
144    it will match only tunnel with the same key; if it is not present,
145    it will match only keyless tunnel.
146 
147    All keysless packets, if not matched configured keyless tunnels
148    will match fallback tunnel.
149    Given src, dst and key, find appropriate for input tunnel.
150 */
151 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
152                                    int link, __be16 flags,
153                                    __be32 remote, __be32 local,
154                                    __be32 key)
155 {
156         unsigned int hash;
157         struct ip_tunnel *t, *cand = NULL;
158         struct hlist_head *head;
159 
160         hash = ip_tunnel_hash(key, remote);
161         head = &itn->tunnels[hash];
162 
163         hlist_for_each_entry_rcu(t, head, hash_node) {
164                 if (local != t->parms.iph.saddr ||
165                     remote != t->parms.iph.daddr ||
166                     !(t->dev->flags & IFF_UP))
167                         continue;
168 
169                 if (!ip_tunnel_key_match(&t->parms, flags, key))
170                         continue;
171 
172                 if (t->parms.link == link)
173                         return t;
174                 else
175                         cand = t;
176         }
177 
178         hlist_for_each_entry_rcu(t, head, hash_node) {
179                 if (remote != t->parms.iph.daddr ||
180                     t->parms.iph.saddr != 0 ||
181                     !(t->dev->flags & IFF_UP))
182                         continue;
183 
184                 if (!ip_tunnel_key_match(&t->parms, flags, key))
185                         continue;
186 
187                 if (t->parms.link == link)
188                         return t;
189                 else if (!cand)
190                         cand = t;
191         }
192 
193         hash = ip_tunnel_hash(key, 0);
194         head = &itn->tunnels[hash];
195 
196         hlist_for_each_entry_rcu(t, head, hash_node) {
197                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
198                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
199                         continue;
200 
201                 if (!(t->dev->flags & IFF_UP))
202                         continue;
203 
204                 if (!ip_tunnel_key_match(&t->parms, flags, key))
205                         continue;
206 
207                 if (t->parms.link == link)
208                         return t;
209                 else if (!cand)
210                         cand = t;
211         }
212 
213         if (flags & TUNNEL_NO_KEY)
214                 goto skip_key_lookup;
215 
216         hlist_for_each_entry_rcu(t, head, hash_node) {
217                 if (t->parms.i_key != key ||
218                     t->parms.iph.saddr != 0 ||
219                     t->parms.iph.daddr != 0 ||
220                     !(t->dev->flags & IFF_UP))
221                         continue;
222 
223                 if (t->parms.link == link)
224                         return t;
225                 else if (!cand)
226                         cand = t;
227         }
228 
229 skip_key_lookup:
230         if (cand)
231                 return cand;
232 
233         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
234                 return netdev_priv(itn->fb_tunnel_dev);
235 
236 
237         return NULL;
238 }
239 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
240 
241 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
242                                     struct ip_tunnel_parm *parms)
243 {
244         unsigned int h;
245         __be32 remote;
246         __be32 i_key = parms->i_key;
247 
248         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
249                 remote = parms->iph.daddr;
250         else
251                 remote = 0;
252 
253         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
254                 i_key = 0;
255 
256         h = ip_tunnel_hash(i_key, remote);
257         return &itn->tunnels[h];
258 }
259 
260 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
261 {
262         struct hlist_head *head = ip_bucket(itn, &t->parms);
263 
264         hlist_add_head_rcu(&t->hash_node, head);
265 }
266 
267 static void ip_tunnel_del(struct ip_tunnel *t)
268 {
269         hlist_del_init_rcu(&t->hash_node);
270 }
271 
272 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
273                                         struct ip_tunnel_parm *parms,
274                                         int type)
275 {
276         __be32 remote = parms->iph.daddr;
277         __be32 local = parms->iph.saddr;
278         __be32 key = parms->i_key;
279         __be16 flags = parms->i_flags;
280         int link = parms->link;
281         struct ip_tunnel *t = NULL;
282         struct hlist_head *head = ip_bucket(itn, parms);
283 
284         hlist_for_each_entry_rcu(t, head, hash_node) {
285                 if (local == t->parms.iph.saddr &&
286                     remote == t->parms.iph.daddr &&
287                     link == t->parms.link &&
288                     type == t->dev->type &&
289                     ip_tunnel_key_match(&t->parms, flags, key))
290                         break;
291         }
292         return t;
293 }
294 
295 static struct net_device *__ip_tunnel_create(struct net *net,
296                                              const struct rtnl_link_ops *ops,
297                                              struct ip_tunnel_parm *parms)
298 {
299         int err;
300         struct ip_tunnel *tunnel;
301         struct net_device *dev;
302         char name[IFNAMSIZ];
303 
304         if (parms->name[0])
305                 strlcpy(name, parms->name, IFNAMSIZ);
306         else {
307                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
308                         err = -E2BIG;
309                         goto failed;
310                 }
311                 strlcpy(name, ops->kind, IFNAMSIZ);
312                 strncat(name, "%d", 2);
313         }
314 
315         ASSERT_RTNL();
316         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
317         if (!dev) {
318                 err = -ENOMEM;
319                 goto failed;
320         }
321         dev_net_set(dev, net);
322 
323         dev->rtnl_link_ops = ops;
324 
325         tunnel = netdev_priv(dev);
326         tunnel->parms = *parms;
327         tunnel->net = net;
328 
329         err = register_netdevice(dev);
330         if (err)
331                 goto failed_free;
332 
333         return dev;
334 
335 failed_free:
336         free_netdev(dev);
337 failed:
338         return ERR_PTR(err);
339 }
340 
341 static inline void init_tunnel_flow(struct flowi4 *fl4,
342                                     int proto,
343                                     __be32 daddr, __be32 saddr,
344                                     __be32 key, __u8 tos, int oif)
345 {
346         memset(fl4, 0, sizeof(*fl4));
347         fl4->flowi4_oif = oif;
348         fl4->daddr = daddr;
349         fl4->saddr = saddr;
350         fl4->flowi4_tos = tos;
351         fl4->flowi4_proto = proto;
352         fl4->fl4_gre_key = key;
353 }
354 
355 static int ip_tunnel_bind_dev(struct net_device *dev)
356 {
357         struct net_device *tdev = NULL;
358         struct ip_tunnel *tunnel = netdev_priv(dev);
359         const struct iphdr *iph;
360         int hlen = LL_MAX_HEADER;
361         int mtu = ETH_DATA_LEN;
362         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
363 
364         iph = &tunnel->parms.iph;
365 
366         /* Guess output device to choose reasonable mtu and needed_headroom */
367         if (iph->daddr) {
368                 struct flowi4 fl4;
369                 struct rtable *rt;
370 
371                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
372                                  iph->saddr, tunnel->parms.o_key,
373                                  RT_TOS(iph->tos), tunnel->parms.link);
374                 rt = ip_route_output_key(tunnel->net, &fl4);
375 
376                 if (!IS_ERR(rt)) {
377                         tdev = rt->dst.dev;
378                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
379                         ip_rt_put(rt);
380                 }
381                 if (dev->type != ARPHRD_ETHER)
382                         dev->flags |= IFF_POINTOPOINT;
383         }
384 
385         if (!tdev && tunnel->parms.link)
386                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
387 
388         if (tdev) {
389                 hlen = tdev->hard_header_len + tdev->needed_headroom;
390                 mtu = tdev->mtu;
391         }
392 
393         dev->needed_headroom = t_hlen + hlen;
394         mtu -= (dev->hard_header_len + t_hlen);
395 
396         if (mtu < 68)
397                 mtu = 68;
398 
399         return mtu;
400 }
401 
402 static struct ip_tunnel *ip_tunnel_create(struct net *net,
403                                           struct ip_tunnel_net *itn,
404                                           struct ip_tunnel_parm *parms)
405 {
406         struct ip_tunnel *nt;
407         struct net_device *dev;
408 
409         BUG_ON(!itn->fb_tunnel_dev);
410         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
411         if (IS_ERR(dev))
412                 return ERR_CAST(dev);
413 
414         dev->mtu = ip_tunnel_bind_dev(dev);
415 
416         nt = netdev_priv(dev);
417         ip_tunnel_add(itn, nt);
418         return nt;
419 }
420 
421 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
422                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
423 {
424         struct pcpu_sw_netstats *tstats;
425         const struct iphdr *iph = ip_hdr(skb);
426         int err;
427 
428 #ifdef CONFIG_NET_IPGRE_BROADCAST
429         if (ipv4_is_multicast(iph->daddr)) {
430                 tunnel->dev->stats.multicast++;
431                 skb->pkt_type = PACKET_BROADCAST;
432         }
433 #endif
434 
435         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
436              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
437                 tunnel->dev->stats.rx_crc_errors++;
438                 tunnel->dev->stats.rx_errors++;
439                 goto drop;
440         }
441 
442         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
443                 if (!(tpi->flags&TUNNEL_SEQ) ||
444                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
445                         tunnel->dev->stats.rx_fifo_errors++;
446                         tunnel->dev->stats.rx_errors++;
447                         goto drop;
448                 }
449                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
450         }
451 
452         skb_reset_network_header(skb);
453 
454         err = IP_ECN_decapsulate(iph, skb);
455         if (unlikely(err)) {
456                 if (log_ecn_error)
457                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
458                                         &iph->saddr, iph->tos);
459                 if (err > 1) {
460                         ++tunnel->dev->stats.rx_frame_errors;
461                         ++tunnel->dev->stats.rx_errors;
462                         goto drop;
463                 }
464         }
465 
466         tstats = this_cpu_ptr(tunnel->dev->tstats);
467         u64_stats_update_begin(&tstats->syncp);
468         tstats->rx_packets++;
469         tstats->rx_bytes += skb->len;
470         u64_stats_update_end(&tstats->syncp);
471 
472         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
473 
474         if (tunnel->dev->type == ARPHRD_ETHER) {
475                 skb->protocol = eth_type_trans(skb, tunnel->dev);
476                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
477         } else {
478                 skb->dev = tunnel->dev;
479         }
480 
481         gro_cells_receive(&tunnel->gro_cells, skb);
482         return 0;
483 
484 drop:
485         kfree_skb(skb);
486         return 0;
487 }
488 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
489 
490 static int ip_encap_hlen(struct ip_tunnel_encap *e)
491 {
492         const struct ip_tunnel_encap_ops *ops;
493         int hlen = -EINVAL;
494 
495         if (e->type == TUNNEL_ENCAP_NONE)
496                 return 0;
497 
498         if (e->type >= MAX_IPTUN_ENCAP_OPS)
499                 return -EINVAL;
500 
501         rcu_read_lock();
502         ops = rcu_dereference(iptun_encaps[e->type]);
503         if (likely(ops && ops->encap_hlen))
504                 hlen = ops->encap_hlen(e);
505         rcu_read_unlock();
506 
507         return hlen;
508 }
509 
510 const struct ip_tunnel_encap_ops __rcu *
511                 iptun_encaps[MAX_IPTUN_ENCAP_OPS] __read_mostly;
512 
513 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
514                             unsigned int num)
515 {
516         if (num >= MAX_IPTUN_ENCAP_OPS)
517                 return -ERANGE;
518 
519         return !cmpxchg((const struct ip_tunnel_encap_ops **)
520                         &iptun_encaps[num],
521                         NULL, ops) ? 0 : -1;
522 }
523 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
524 
525 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
526                             unsigned int num)
527 {
528         int ret;
529 
530         if (num >= MAX_IPTUN_ENCAP_OPS)
531                 return -ERANGE;
532 
533         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
534                        &iptun_encaps[num],
535                        ops, NULL) == ops) ? 0 : -1;
536 
537         synchronize_net();
538 
539         return ret;
540 }
541 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
542 
543 int ip_tunnel_encap_setup(struct ip_tunnel *t,
544                           struct ip_tunnel_encap *ipencap)
545 {
546         int hlen;
547 
548         memset(&t->encap, 0, sizeof(t->encap));
549 
550         hlen = ip_encap_hlen(ipencap);
551         if (hlen < 0)
552                 return hlen;
553 
554         t->encap.type = ipencap->type;
555         t->encap.sport = ipencap->sport;
556         t->encap.dport = ipencap->dport;
557         t->encap.flags = ipencap->flags;
558 
559         t->encap_hlen = hlen;
560         t->hlen = t->encap_hlen + t->tun_hlen;
561 
562         return 0;
563 }
564 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
565 
566 int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
567                     u8 *protocol, struct flowi4 *fl4)
568 {
569         const struct ip_tunnel_encap_ops *ops;
570         int ret = -EINVAL;
571 
572         if (t->encap.type == TUNNEL_ENCAP_NONE)
573                 return 0;
574 
575         if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
576                 return -EINVAL;
577 
578         rcu_read_lock();
579         ops = rcu_dereference(iptun_encaps[t->encap.type]);
580         if (likely(ops && ops->build_header))
581                 ret = ops->build_header(skb, &t->encap, protocol, fl4);
582         rcu_read_unlock();
583 
584         return ret;
585 }
586 EXPORT_SYMBOL(ip_tunnel_encap);
587 
588 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
589                             struct rtable *rt, __be16 df,
590                             const struct iphdr *inner_iph)
591 {
592         struct ip_tunnel *tunnel = netdev_priv(dev);
593         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
594         int mtu;
595 
596         if (df)
597                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
598                                         - sizeof(struct iphdr) - tunnel->hlen;
599         else
600                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
601 
602         if (skb_dst(skb))
603                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
604 
605         if (skb->protocol == htons(ETH_P_IP)) {
606                 if (!skb_is_gso(skb) &&
607                     (inner_iph->frag_off & htons(IP_DF)) &&
608                     mtu < pkt_size) {
609                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
610                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
611                         return -E2BIG;
612                 }
613         }
614 #if IS_ENABLED(CONFIG_IPV6)
615         else if (skb->protocol == htons(ETH_P_IPV6)) {
616                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
617 
618                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
619                            mtu >= IPV6_MIN_MTU) {
620                         if ((tunnel->parms.iph.daddr &&
621                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
622                             rt6->rt6i_dst.plen == 128) {
623                                 rt6->rt6i_flags |= RTF_MODIFIED;
624                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
625                         }
626                 }
627 
628                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
629                                         mtu < pkt_size) {
630                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
631                         return -E2BIG;
632                 }
633         }
634 #endif
635         return 0;
636 }
637 
638 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
639                     const struct iphdr *tnl_params, u8 protocol)
640 {
641         struct ip_tunnel *tunnel = netdev_priv(dev);
642         const struct iphdr *inner_iph;
643         struct flowi4 fl4;
644         u8     tos, ttl;
645         __be16 df;
646         struct rtable *rt;              /* Route to the other host */
647         unsigned int max_headroom;      /* The extra header space needed */
648         __be32 dst;
649         int err;
650         bool connected;
651 
652         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
653         connected = (tunnel->parms.iph.daddr != 0);
654 
655         dst = tnl_params->daddr;
656         if (dst == 0) {
657                 /* NBMA tunnel */
658 
659                 if (!skb_dst(skb)) {
660                         dev->stats.tx_fifo_errors++;
661                         goto tx_error;
662                 }
663 
664                 if (skb->protocol == htons(ETH_P_IP)) {
665                         rt = skb_rtable(skb);
666                         dst = rt_nexthop(rt, inner_iph->daddr);
667                 }
668 #if IS_ENABLED(CONFIG_IPV6)
669                 else if (skb->protocol == htons(ETH_P_IPV6)) {
670                         const struct in6_addr *addr6;
671                         struct neighbour *neigh;
672                         bool do_tx_error_icmp;
673                         int addr_type;
674 
675                         neigh = dst_neigh_lookup(skb_dst(skb),
676                                                  &ipv6_hdr(skb)->daddr);
677                         if (!neigh)
678                                 goto tx_error;
679 
680                         addr6 = (const struct in6_addr *)&neigh->primary_key;
681                         addr_type = ipv6_addr_type(addr6);
682 
683                         if (addr_type == IPV6_ADDR_ANY) {
684                                 addr6 = &ipv6_hdr(skb)->daddr;
685                                 addr_type = ipv6_addr_type(addr6);
686                         }
687 
688                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
689                                 do_tx_error_icmp = true;
690                         else {
691                                 do_tx_error_icmp = false;
692                                 dst = addr6->s6_addr32[3];
693                         }
694                         neigh_release(neigh);
695                         if (do_tx_error_icmp)
696                                 goto tx_error_icmp;
697                 }
698 #endif
699                 else
700                         goto tx_error;
701 
702                 connected = false;
703         }
704 
705         tos = tnl_params->tos;
706         if (tos & 0x1) {
707                 tos &= ~0x1;
708                 if (skb->protocol == htons(ETH_P_IP)) {
709                         tos = inner_iph->tos;
710                         connected = false;
711                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
712                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
713                         connected = false;
714                 }
715         }
716 
717         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
718                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
719 
720         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
721                 goto tx_error;
722 
723         rt = connected ? tunnel_rtable_get(tunnel, 0, &fl4.saddr) : NULL;
724 
725         if (!rt) {
726                 rt = ip_route_output_key(tunnel->net, &fl4);
727 
728                 if (IS_ERR(rt)) {
729                         dev->stats.tx_carrier_errors++;
730                         goto tx_error;
731                 }
732                 if (connected)
733                         tunnel_dst_set(tunnel, &rt->dst, fl4.saddr);
734         }
735 
736         if (rt->dst.dev == dev) {
737                 ip_rt_put(rt);
738                 dev->stats.collisions++;
739                 goto tx_error;
740         }
741 
742         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
743                 ip_rt_put(rt);
744                 goto tx_error;
745         }
746 
747         if (tunnel->err_count > 0) {
748                 if (time_before(jiffies,
749                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
750                         tunnel->err_count--;
751 
752                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
753                         dst_link_failure(skb);
754                 } else
755                         tunnel->err_count = 0;
756         }
757 
758         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
759         ttl = tnl_params->ttl;
760         if (ttl == 0) {
761                 if (skb->protocol == htons(ETH_P_IP))
762                         ttl = inner_iph->ttl;
763 #if IS_ENABLED(CONFIG_IPV6)
764                 else if (skb->protocol == htons(ETH_P_IPV6))
765                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
766 #endif
767                 else
768                         ttl = ip4_dst_hoplimit(&rt->dst);
769         }
770 
771         df = tnl_params->frag_off;
772         if (skb->protocol == htons(ETH_P_IP))
773                 df |= (inner_iph->frag_off&htons(IP_DF));
774 
775         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
776                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
777         if (max_headroom > dev->needed_headroom)
778                 dev->needed_headroom = max_headroom;
779 
780         if (skb_cow_head(skb, dev->needed_headroom)) {
781                 ip_rt_put(rt);
782                 dev->stats.tx_dropped++;
783                 kfree_skb(skb);
784                 return;
785         }
786 
787         err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol,
788                             tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));
789         iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
790 
791         return;
792 
793 #if IS_ENABLED(CONFIG_IPV6)
794 tx_error_icmp:
795         dst_link_failure(skb);
796 #endif
797 tx_error:
798         dev->stats.tx_errors++;
799         kfree_skb(skb);
800 }
801 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
802 
803 static void ip_tunnel_update(struct ip_tunnel_net *itn,
804                              struct ip_tunnel *t,
805                              struct net_device *dev,
806                              struct ip_tunnel_parm *p,
807                              bool set_mtu)
808 {
809         ip_tunnel_del(t);
810         t->parms.iph.saddr = p->iph.saddr;
811         t->parms.iph.daddr = p->iph.daddr;
812         t->parms.i_key = p->i_key;
813         t->parms.o_key = p->o_key;
814         if (dev->type != ARPHRD_ETHER) {
815                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
816                 memcpy(dev->broadcast, &p->iph.daddr, 4);
817         }
818         ip_tunnel_add(itn, t);
819 
820         t->parms.iph.ttl = p->iph.ttl;
821         t->parms.iph.tos = p->iph.tos;
822         t->parms.iph.frag_off = p->iph.frag_off;
823 
824         if (t->parms.link != p->link) {
825                 int mtu;
826 
827                 t->parms.link = p->link;
828                 mtu = ip_tunnel_bind_dev(dev);
829                 if (set_mtu)
830                         dev->mtu = mtu;
831         }
832         ip_tunnel_dst_reset_all(t);
833         netdev_state_change(dev);
834 }
835 
836 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
837 {
838         int err = 0;
839         struct ip_tunnel *t = netdev_priv(dev);
840         struct net *net = t->net;
841         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
842 
843         BUG_ON(!itn->fb_tunnel_dev);
844         switch (cmd) {
845         case SIOCGETTUNNEL:
846                 if (dev == itn->fb_tunnel_dev) {
847                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
848                         if (!t)
849                                 t = netdev_priv(dev);
850                 }
851                 memcpy(p, &t->parms, sizeof(*p));
852                 break;
853 
854         case SIOCADDTUNNEL:
855         case SIOCCHGTUNNEL:
856                 err = -EPERM;
857                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
858                         goto done;
859                 if (p->iph.ttl)
860                         p->iph.frag_off |= htons(IP_DF);
861                 if (!(p->i_flags & VTI_ISVTI)) {
862                         if (!(p->i_flags & TUNNEL_KEY))
863                                 p->i_key = 0;
864                         if (!(p->o_flags & TUNNEL_KEY))
865                                 p->o_key = 0;
866                 }
867 
868                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
869 
870                 if (cmd == SIOCADDTUNNEL) {
871                         if (!t) {
872                                 t = ip_tunnel_create(net, itn, p);
873                                 err = PTR_ERR_OR_ZERO(t);
874                                 break;
875                         }
876 
877                         err = -EEXIST;
878                         break;
879                 }
880                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
881                         if (t) {
882                                 if (t->dev != dev) {
883                                         err = -EEXIST;
884                                         break;
885                                 }
886                         } else {
887                                 unsigned int nflags = 0;
888 
889                                 if (ipv4_is_multicast(p->iph.daddr))
890                                         nflags = IFF_BROADCAST;
891                                 else if (p->iph.daddr)
892                                         nflags = IFF_POINTOPOINT;
893 
894                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
895                                         err = -EINVAL;
896                                         break;
897                                 }
898 
899                                 t = netdev_priv(dev);
900                         }
901                 }
902 
903                 if (t) {
904                         err = 0;
905                         ip_tunnel_update(itn, t, dev, p, true);
906                 } else {
907                         err = -ENOENT;
908                 }
909                 break;
910 
911         case SIOCDELTUNNEL:
912                 err = -EPERM;
913                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
914                         goto done;
915 
916                 if (dev == itn->fb_tunnel_dev) {
917                         err = -ENOENT;
918                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
919                         if (!t)
920                                 goto done;
921                         err = -EPERM;
922                         if (t == netdev_priv(itn->fb_tunnel_dev))
923                                 goto done;
924                         dev = t->dev;
925                 }
926                 unregister_netdevice(dev);
927                 err = 0;
928                 break;
929 
930         default:
931                 err = -EINVAL;
932         }
933 
934 done:
935         return err;
936 }
937 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
938 
939 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
940 {
941         struct ip_tunnel *tunnel = netdev_priv(dev);
942         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
943 
944         if (new_mtu < 68 ||
945             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
946                 return -EINVAL;
947         dev->mtu = new_mtu;
948         return 0;
949 }
950 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
951 
952 static void ip_tunnel_dev_free(struct net_device *dev)
953 {
954         struct ip_tunnel *tunnel = netdev_priv(dev);
955 
956         gro_cells_destroy(&tunnel->gro_cells);
957         free_percpu(tunnel->dst_cache);
958         free_percpu(dev->tstats);
959         free_netdev(dev);
960 }
961 
962 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
963 {
964         struct ip_tunnel *tunnel = netdev_priv(dev);
965         struct ip_tunnel_net *itn;
966 
967         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
968 
969         if (itn->fb_tunnel_dev != dev) {
970                 ip_tunnel_del(netdev_priv(dev));
971                 unregister_netdevice_queue(dev, head);
972         }
973 }
974 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
975 
976 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
977 {
978         struct ip_tunnel *tunnel = netdev_priv(dev);
979 
980         return tunnel->net;
981 }
982 EXPORT_SYMBOL(ip_tunnel_get_link_net);
983 
984 int ip_tunnel_get_iflink(const struct net_device *dev)
985 {
986         struct ip_tunnel *tunnel = netdev_priv(dev);
987 
988         return tunnel->parms.link;
989 }
990 EXPORT_SYMBOL(ip_tunnel_get_iflink);
991 
992 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
993                                   struct rtnl_link_ops *ops, char *devname)
994 {
995         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
996         struct ip_tunnel_parm parms;
997         unsigned int i;
998 
999         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1000                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1001 
1002         if (!ops) {
1003                 itn->fb_tunnel_dev = NULL;
1004                 return 0;
1005         }
1006 
1007         memset(&parms, 0, sizeof(parms));
1008         if (devname)
1009                 strlcpy(parms.name, devname, IFNAMSIZ);
1010 
1011         rtnl_lock();
1012         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1013         /* FB netdevice is special: we have one, and only one per netns.
1014          * Allowing to move it to another netns is clearly unsafe.
1015          */
1016         if (!IS_ERR(itn->fb_tunnel_dev)) {
1017                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1018                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1019                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1020         }
1021         rtnl_unlock();
1022 
1023         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1024 }
1025 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1026 
1027 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1028                               struct rtnl_link_ops *ops)
1029 {
1030         struct net *net = dev_net(itn->fb_tunnel_dev);
1031         struct net_device *dev, *aux;
1032         int h;
1033 
1034         for_each_netdev_safe(net, dev, aux)
1035                 if (dev->rtnl_link_ops == ops)
1036                         unregister_netdevice_queue(dev, head);
1037 
1038         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1039                 struct ip_tunnel *t;
1040                 struct hlist_node *n;
1041                 struct hlist_head *thead = &itn->tunnels[h];
1042 
1043                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1044                         /* If dev is in the same netns, it has already
1045                          * been added to the list by the previous loop.
1046                          */
1047                         if (!net_eq(dev_net(t->dev), net))
1048                                 unregister_netdevice_queue(t->dev, head);
1049         }
1050 }
1051 
1052 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1053 {
1054         LIST_HEAD(list);
1055 
1056         rtnl_lock();
1057         ip_tunnel_destroy(itn, &list, ops);
1058         unregister_netdevice_many(&list);
1059         rtnl_unlock();
1060 }
1061 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1062 
1063 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1064                       struct ip_tunnel_parm *p)
1065 {
1066         struct ip_tunnel *nt;
1067         struct net *net = dev_net(dev);
1068         struct ip_tunnel_net *itn;
1069         int mtu;
1070         int err;
1071 
1072         nt = netdev_priv(dev);
1073         itn = net_generic(net, nt->ip_tnl_net_id);
1074 
1075         if (ip_tunnel_find(itn, p, dev->type))
1076                 return -EEXIST;
1077 
1078         nt->net = net;
1079         nt->parms = *p;
1080         err = register_netdevice(dev);
1081         if (err)
1082                 goto out;
1083 
1084         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1085                 eth_hw_addr_random(dev);
1086 
1087         mtu = ip_tunnel_bind_dev(dev);
1088         if (!tb[IFLA_MTU])
1089                 dev->mtu = mtu;
1090 
1091         ip_tunnel_add(itn, nt);
1092 
1093 out:
1094         return err;
1095 }
1096 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1097 
1098 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1099                          struct ip_tunnel_parm *p)
1100 {
1101         struct ip_tunnel *t;
1102         struct ip_tunnel *tunnel = netdev_priv(dev);
1103         struct net *net = tunnel->net;
1104         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1105 
1106         if (dev == itn->fb_tunnel_dev)
1107                 return -EINVAL;
1108 
1109         t = ip_tunnel_find(itn, p, dev->type);
1110 
1111         if (t) {
1112                 if (t->dev != dev)
1113                         return -EEXIST;
1114         } else {
1115                 t = tunnel;
1116 
1117                 if (dev->type != ARPHRD_ETHER) {
1118                         unsigned int nflags = 0;
1119 
1120                         if (ipv4_is_multicast(p->iph.daddr))
1121                                 nflags = IFF_BROADCAST;
1122                         else if (p->iph.daddr)
1123                                 nflags = IFF_POINTOPOINT;
1124 
1125                         if ((dev->flags ^ nflags) &
1126                             (IFF_POINTOPOINT | IFF_BROADCAST))
1127                                 return -EINVAL;
1128                 }
1129         }
1130 
1131         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1132         return 0;
1133 }
1134 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1135 
1136 int ip_tunnel_init(struct net_device *dev)
1137 {
1138         struct ip_tunnel *tunnel = netdev_priv(dev);
1139         struct iphdr *iph = &tunnel->parms.iph;
1140         int err;
1141 
1142         dev->destructor = ip_tunnel_dev_free;
1143         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1144         if (!dev->tstats)
1145                 return -ENOMEM;
1146 
1147         tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
1148         if (!tunnel->dst_cache) {
1149                 free_percpu(dev->tstats);
1150                 return -ENOMEM;
1151         }
1152 
1153         err = gro_cells_init(&tunnel->gro_cells, dev);
1154         if (err) {
1155                 free_percpu(tunnel->dst_cache);
1156                 free_percpu(dev->tstats);
1157                 return err;
1158         }
1159 
1160         tunnel->dev = dev;
1161         tunnel->net = dev_net(dev);
1162         strcpy(tunnel->parms.name, dev->name);
1163         iph->version            = 4;
1164         iph->ihl                = 5;
1165 
1166         return 0;
1167 }
1168 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1169 
1170 void ip_tunnel_uninit(struct net_device *dev)
1171 {
1172         struct ip_tunnel *tunnel = netdev_priv(dev);
1173         struct net *net = tunnel->net;
1174         struct ip_tunnel_net *itn;
1175 
1176         itn = net_generic(net, tunnel->ip_tnl_net_id);
1177         /* fb_tunnel_dev will be unregisted in net-exit call. */
1178         if (itn->fb_tunnel_dev != dev)
1179                 ip_tunnel_del(netdev_priv(dev));
1180 
1181         ip_tunnel_dst_reset_all(tunnel);
1182 }
1183 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1184 
1185 /* Do least required initialization, rest of init is done in tunnel_init call */
1186 void ip_tunnel_setup(struct net_device *dev, int net_id)
1187 {
1188         struct ip_tunnel *tunnel = netdev_priv(dev);
1189         tunnel->ip_tnl_net_id = net_id;
1190 }
1191 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1192 
1193 MODULE_LICENSE("GPL");
1194 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp