~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_tunnel.c

Version: ~ [ linux-5.13-rc1 ] ~ [ linux-5.12.2 ] ~ [ linux-5.11.19 ] ~ [ linux-5.10.35 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.117 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.190 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.232 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.268 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.268 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2013 Nicira, Inc.
  3  *
  4  * This program is free software; you can redistribute it and/or
  5  * modify it under the terms of version 2 of the GNU General Public
  6  * License as published by the Free Software Foundation.
  7  *
  8  * This program is distributed in the hope that it will be useful, but
  9  * WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11  * General Public License for more details.
 12  *
 13  * You should have received a copy of the GNU General Public License
 14  * along with this program; if not, write to the Free Software
 15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 16  * 02110-1301, USA
 17  */
 18 
 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 20 
 21 #include <linux/capability.h>
 22 #include <linux/module.h>
 23 #include <linux/types.h>
 24 #include <linux/kernel.h>
 25 #include <linux/slab.h>
 26 #include <linux/uaccess.h>
 27 #include <linux/skbuff.h>
 28 #include <linux/netdevice.h>
 29 #include <linux/in.h>
 30 #include <linux/tcp.h>
 31 #include <linux/udp.h>
 32 #include <linux/if_arp.h>
 33 #include <linux/init.h>
 34 #include <linux/in6.h>
 35 #include <linux/inetdevice.h>
 36 #include <linux/igmp.h>
 37 #include <linux/netfilter_ipv4.h>
 38 #include <linux/etherdevice.h>
 39 #include <linux/if_ether.h>
 40 #include <linux/if_vlan.h>
 41 #include <linux/rculist.h>
 42 #include <linux/err.h>
 43 
 44 #include <net/sock.h>
 45 #include <net/ip.h>
 46 #include <net/icmp.h>
 47 #include <net/protocol.h>
 48 #include <net/ip_tunnels.h>
 49 #include <net/arp.h>
 50 #include <net/checksum.h>
 51 #include <net/dsfield.h>
 52 #include <net/inet_ecn.h>
 53 #include <net/xfrm.h>
 54 #include <net/net_namespace.h>
 55 #include <net/netns/generic.h>
 56 #include <net/rtnetlink.h>
 57 #include <net/udp.h>
 58 
 59 #if IS_ENABLED(CONFIG_IPV6)
 60 #include <net/ipv6.h>
 61 #include <net/ip6_fib.h>
 62 #include <net/ip6_route.h>
 63 #endif
 64 
 65 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
 66 {
 67         return hash_32((__force u32)key ^ (__force u32)remote,
 68                          IP_TNL_HASH_BITS);
 69 }
 70 
 71 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
 72                                 __be16 flags, __be32 key)
 73 {
 74         if (p->i_flags & TUNNEL_KEY) {
 75                 if (flags & TUNNEL_KEY)
 76                         return key == p->i_key;
 77                 else
 78                         /* key expected, none present */
 79                         return false;
 80         } else
 81                 return !(flags & TUNNEL_KEY);
 82 }
 83 
 84 /* Fallback tunnel: no source, no destination, no key, no options
 85 
 86    Tunnel hash table:
 87    We require exact key match i.e. if a key is present in packet
 88    it will match only tunnel with the same key; if it is not present,
 89    it will match only keyless tunnel.
 90 
 91    All keysless packets, if not matched configured keyless tunnels
 92    will match fallback tunnel.
 93    Given src, dst and key, find appropriate for input tunnel.
 94 */
 95 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 96                                    int link, __be16 flags,
 97                                    __be32 remote, __be32 local,
 98                                    __be32 key)
 99 {
100         unsigned int hash;
101         struct ip_tunnel *t, *cand = NULL;
102         struct hlist_head *head;
103 
104         hash = ip_tunnel_hash(key, remote);
105         head = &itn->tunnels[hash];
106 
107         hlist_for_each_entry_rcu(t, head, hash_node) {
108                 if (local != t->parms.iph.saddr ||
109                     remote != t->parms.iph.daddr ||
110                     !(t->dev->flags & IFF_UP))
111                         continue;
112 
113                 if (!ip_tunnel_key_match(&t->parms, flags, key))
114                         continue;
115 
116                 if (t->parms.link == link)
117                         return t;
118                 else
119                         cand = t;
120         }
121 
122         hlist_for_each_entry_rcu(t, head, hash_node) {
123                 if (remote != t->parms.iph.daddr ||
124                     t->parms.iph.saddr != 0 ||
125                     !(t->dev->flags & IFF_UP))
126                         continue;
127 
128                 if (!ip_tunnel_key_match(&t->parms, flags, key))
129                         continue;
130 
131                 if (t->parms.link == link)
132                         return t;
133                 else if (!cand)
134                         cand = t;
135         }
136 
137         hash = ip_tunnel_hash(key, 0);
138         head = &itn->tunnels[hash];
139 
140         hlist_for_each_entry_rcu(t, head, hash_node) {
141                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
142                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
143                         continue;
144 
145                 if (!(t->dev->flags & IFF_UP))
146                         continue;
147 
148                 if (!ip_tunnel_key_match(&t->parms, flags, key))
149                         continue;
150 
151                 if (t->parms.link == link)
152                         return t;
153                 else if (!cand)
154                         cand = t;
155         }
156 
157         if (flags & TUNNEL_NO_KEY)
158                 goto skip_key_lookup;
159 
160         hlist_for_each_entry_rcu(t, head, hash_node) {
161                 if (t->parms.i_key != key ||
162                     t->parms.iph.saddr != 0 ||
163                     t->parms.iph.daddr != 0 ||
164                     !(t->dev->flags & IFF_UP))
165                         continue;
166 
167                 if (t->parms.link == link)
168                         return t;
169                 else if (!cand)
170                         cand = t;
171         }
172 
173 skip_key_lookup:
174         if (cand)
175                 return cand;
176 
177         t = rcu_dereference(itn->collect_md_tun);
178         if (t)
179                 return t;
180 
181         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
182                 return netdev_priv(itn->fb_tunnel_dev);
183 
184         return NULL;
185 }
186 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
187 
188 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
189                                     struct ip_tunnel_parm *parms)
190 {
191         unsigned int h;
192         __be32 remote;
193         __be32 i_key = parms->i_key;
194 
195         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
196                 remote = parms->iph.daddr;
197         else
198                 remote = 0;
199 
200         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
201                 i_key = 0;
202 
203         h = ip_tunnel_hash(i_key, remote);
204         return &itn->tunnels[h];
205 }
206 
207 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
208 {
209         struct hlist_head *head = ip_bucket(itn, &t->parms);
210 
211         if (t->collect_md)
212                 rcu_assign_pointer(itn->collect_md_tun, t);
213         hlist_add_head_rcu(&t->hash_node, head);
214 }
215 
216 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
217 {
218         if (t->collect_md)
219                 rcu_assign_pointer(itn->collect_md_tun, NULL);
220         hlist_del_init_rcu(&t->hash_node);
221 }
222 
223 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
224                                         struct ip_tunnel_parm *parms,
225                                         int type)
226 {
227         __be32 remote = parms->iph.daddr;
228         __be32 local = parms->iph.saddr;
229         __be32 key = parms->i_key;
230         __be16 flags = parms->i_flags;
231         int link = parms->link;
232         struct ip_tunnel *t = NULL;
233         struct hlist_head *head = ip_bucket(itn, parms);
234 
235         hlist_for_each_entry_rcu(t, head, hash_node) {
236                 if (local == t->parms.iph.saddr &&
237                     remote == t->parms.iph.daddr &&
238                     link == t->parms.link &&
239                     type == t->dev->type &&
240                     ip_tunnel_key_match(&t->parms, flags, key))
241                         break;
242         }
243         return t;
244 }
245 
246 static struct net_device *__ip_tunnel_create(struct net *net,
247                                              const struct rtnl_link_ops *ops,
248                                              struct ip_tunnel_parm *parms)
249 {
250         int err;
251         struct ip_tunnel *tunnel;
252         struct net_device *dev;
253         char name[IFNAMSIZ];
254 
255         if (parms->name[0])
256                 strlcpy(name, parms->name, IFNAMSIZ);
257         else {
258                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
259                         err = -E2BIG;
260                         goto failed;
261                 }
262                 strlcpy(name, ops->kind, IFNAMSIZ);
263                 strncat(name, "%d", 2);
264         }
265 
266         ASSERT_RTNL();
267         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
268         if (!dev) {
269                 err = -ENOMEM;
270                 goto failed;
271         }
272         dev_net_set(dev, net);
273 
274         dev->rtnl_link_ops = ops;
275 
276         tunnel = netdev_priv(dev);
277         tunnel->parms = *parms;
278         tunnel->net = net;
279 
280         err = register_netdevice(dev);
281         if (err)
282                 goto failed_free;
283 
284         return dev;
285 
286 failed_free:
287         free_netdev(dev);
288 failed:
289         return ERR_PTR(err);
290 }
291 
292 static inline void init_tunnel_flow(struct flowi4 *fl4,
293                                     int proto,
294                                     __be32 daddr, __be32 saddr,
295                                     __be32 key, __u8 tos, int oif)
296 {
297         memset(fl4, 0, sizeof(*fl4));
298         fl4->flowi4_oif = oif;
299         fl4->daddr = daddr;
300         fl4->saddr = saddr;
301         fl4->flowi4_tos = tos;
302         fl4->flowi4_proto = proto;
303         fl4->fl4_gre_key = key;
304 }
305 
306 static int ip_tunnel_bind_dev(struct net_device *dev)
307 {
308         struct net_device *tdev = NULL;
309         struct ip_tunnel *tunnel = netdev_priv(dev);
310         const struct iphdr *iph;
311         int hlen = LL_MAX_HEADER;
312         int mtu = ETH_DATA_LEN;
313         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
314 
315         iph = &tunnel->parms.iph;
316 
317         /* Guess output device to choose reasonable mtu and needed_headroom */
318         if (iph->daddr) {
319                 struct flowi4 fl4;
320                 struct rtable *rt;
321 
322                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
323                                  iph->saddr, tunnel->parms.o_key,
324                                  RT_TOS(iph->tos), tunnel->parms.link);
325                 rt = ip_route_output_key(tunnel->net, &fl4);
326 
327                 if (!IS_ERR(rt)) {
328                         tdev = rt->dst.dev;
329                         ip_rt_put(rt);
330                 }
331                 if (dev->type != ARPHRD_ETHER)
332                         dev->flags |= IFF_POINTOPOINT;
333 
334                 dst_cache_reset(&tunnel->dst_cache);
335         }
336 
337         if (!tdev && tunnel->parms.link)
338                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
339 
340         if (tdev) {
341                 hlen = tdev->hard_header_len + tdev->needed_headroom;
342                 mtu = tdev->mtu;
343         }
344 
345         dev->needed_headroom = t_hlen + hlen;
346         mtu -= (dev->hard_header_len + t_hlen);
347 
348         if (mtu < 68)
349                 mtu = 68;
350 
351         return mtu;
352 }
353 
354 static struct ip_tunnel *ip_tunnel_create(struct net *net,
355                                           struct ip_tunnel_net *itn,
356                                           struct ip_tunnel_parm *parms)
357 {
358         struct ip_tunnel *nt;
359         struct net_device *dev;
360 
361         BUG_ON(!itn->fb_tunnel_dev);
362         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
363         if (IS_ERR(dev))
364                 return ERR_CAST(dev);
365 
366         dev->mtu = ip_tunnel_bind_dev(dev);
367 
368         nt = netdev_priv(dev);
369         ip_tunnel_add(itn, nt);
370         return nt;
371 }
372 
373 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
374                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
375                   bool log_ecn_error)
376 {
377         struct pcpu_sw_netstats *tstats;
378         const struct iphdr *iph = ip_hdr(skb);
379         int err;
380 
381 #ifdef CONFIG_NET_IPGRE_BROADCAST
382         if (ipv4_is_multicast(iph->daddr)) {
383                 tunnel->dev->stats.multicast++;
384                 skb->pkt_type = PACKET_BROADCAST;
385         }
386 #endif
387 
388         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
389              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
390                 tunnel->dev->stats.rx_crc_errors++;
391                 tunnel->dev->stats.rx_errors++;
392                 goto drop;
393         }
394 
395         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
396                 if (!(tpi->flags&TUNNEL_SEQ) ||
397                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
398                         tunnel->dev->stats.rx_fifo_errors++;
399                         tunnel->dev->stats.rx_errors++;
400                         goto drop;
401                 }
402                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
403         }
404 
405         skb_reset_network_header(skb);
406 
407         err = IP_ECN_decapsulate(iph, skb);
408         if (unlikely(err)) {
409                 if (log_ecn_error)
410                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
411                                         &iph->saddr, iph->tos);
412                 if (err > 1) {
413                         ++tunnel->dev->stats.rx_frame_errors;
414                         ++tunnel->dev->stats.rx_errors;
415                         goto drop;
416                 }
417         }
418 
419         tstats = this_cpu_ptr(tunnel->dev->tstats);
420         u64_stats_update_begin(&tstats->syncp);
421         tstats->rx_packets++;
422         tstats->rx_bytes += skb->len;
423         u64_stats_update_end(&tstats->syncp);
424 
425         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
426 
427         if (tunnel->dev->type == ARPHRD_ETHER) {
428                 skb->protocol = eth_type_trans(skb, tunnel->dev);
429                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
430         } else {
431                 skb->dev = tunnel->dev;
432         }
433 
434         if (tun_dst)
435                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
436 
437         gro_cells_receive(&tunnel->gro_cells, skb);
438         return 0;
439 
440 drop:
441         kfree_skb(skb);
442         return 0;
443 }
444 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
445 
446 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
447                             unsigned int num)
448 {
449         if (num >= MAX_IPTUN_ENCAP_OPS)
450                 return -ERANGE;
451 
452         return !cmpxchg((const struct ip_tunnel_encap_ops **)
453                         &iptun_encaps[num],
454                         NULL, ops) ? 0 : -1;
455 }
456 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
457 
458 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
459                             unsigned int num)
460 {
461         int ret;
462 
463         if (num >= MAX_IPTUN_ENCAP_OPS)
464                 return -ERANGE;
465 
466         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
467                        &iptun_encaps[num],
468                        ops, NULL) == ops) ? 0 : -1;
469 
470         synchronize_net();
471 
472         return ret;
473 }
474 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
475 
476 int ip_tunnel_encap_setup(struct ip_tunnel *t,
477                           struct ip_tunnel_encap *ipencap)
478 {
479         int hlen;
480 
481         memset(&t->encap, 0, sizeof(t->encap));
482 
483         hlen = ip_encap_hlen(ipencap);
484         if (hlen < 0)
485                 return hlen;
486 
487         t->encap.type = ipencap->type;
488         t->encap.sport = ipencap->sport;
489         t->encap.dport = ipencap->dport;
490         t->encap.flags = ipencap->flags;
491 
492         t->encap_hlen = hlen;
493         t->hlen = t->encap_hlen + t->tun_hlen;
494 
495         return 0;
496 }
497 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
498 
499 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
500                             struct rtable *rt, __be16 df,
501                             const struct iphdr *inner_iph)
502 {
503         struct ip_tunnel *tunnel = netdev_priv(dev);
504         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
505         int mtu;
506 
507         if (df)
508                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
509                                         - sizeof(struct iphdr) - tunnel->hlen;
510         else
511                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
512 
513         if (skb_dst(skb))
514                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
515 
516         if (skb->protocol == htons(ETH_P_IP)) {
517                 if (!skb_is_gso(skb) &&
518                     (inner_iph->frag_off & htons(IP_DF)) &&
519                     mtu < pkt_size) {
520                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
521                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
522                         return -E2BIG;
523                 }
524         }
525 #if IS_ENABLED(CONFIG_IPV6)
526         else if (skb->protocol == htons(ETH_P_IPV6)) {
527                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
528 
529                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
530                            mtu >= IPV6_MIN_MTU) {
531                         if ((tunnel->parms.iph.daddr &&
532                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
533                             rt6->rt6i_dst.plen == 128) {
534                                 rt6->rt6i_flags |= RTF_MODIFIED;
535                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
536                         }
537                 }
538 
539                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
540                                         mtu < pkt_size) {
541                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
542                         return -E2BIG;
543                 }
544         }
545 #endif
546         return 0;
547 }
548 
549 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
550                     const struct iphdr *tnl_params, u8 protocol)
551 {
552         struct ip_tunnel *tunnel = netdev_priv(dev);
553         const struct iphdr *inner_iph;
554         struct flowi4 fl4;
555         u8     tos, ttl;
556         __be16 df;
557         struct rtable *rt;              /* Route to the other host */
558         unsigned int max_headroom;      /* The extra header space needed */
559         __be32 dst;
560         bool connected;
561 
562         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
563         connected = (tunnel->parms.iph.daddr != 0);
564 
565         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
566 
567         dst = tnl_params->daddr;
568         if (dst == 0) {
569                 /* NBMA tunnel */
570 
571                 if (!skb_dst(skb)) {
572                         dev->stats.tx_fifo_errors++;
573                         goto tx_error;
574                 }
575 
576                 if (skb->protocol == htons(ETH_P_IP)) {
577                         rt = skb_rtable(skb);
578                         dst = rt_nexthop(rt, inner_iph->daddr);
579                 }
580 #if IS_ENABLED(CONFIG_IPV6)
581                 else if (skb->protocol == htons(ETH_P_IPV6)) {
582                         const struct in6_addr *addr6;
583                         struct neighbour *neigh;
584                         bool do_tx_error_icmp;
585                         int addr_type;
586 
587                         neigh = dst_neigh_lookup(skb_dst(skb),
588                                                  &ipv6_hdr(skb)->daddr);
589                         if (!neigh)
590                                 goto tx_error;
591 
592                         addr6 = (const struct in6_addr *)&neigh->primary_key;
593                         addr_type = ipv6_addr_type(addr6);
594 
595                         if (addr_type == IPV6_ADDR_ANY) {
596                                 addr6 = &ipv6_hdr(skb)->daddr;
597                                 addr_type = ipv6_addr_type(addr6);
598                         }
599 
600                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
601                                 do_tx_error_icmp = true;
602                         else {
603                                 do_tx_error_icmp = false;
604                                 dst = addr6->s6_addr32[3];
605                         }
606                         neigh_release(neigh);
607                         if (do_tx_error_icmp)
608                                 goto tx_error_icmp;
609                 }
610 #endif
611                 else
612                         goto tx_error;
613 
614                 connected = false;
615         }
616 
617         tos = tnl_params->tos;
618         if (tos & 0x1) {
619                 tos &= ~0x1;
620                 if (skb->protocol == htons(ETH_P_IP)) {
621                         tos = inner_iph->tos;
622                         connected = false;
623                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
624                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
625                         connected = false;
626                 }
627         }
628 
629         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
630                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
631 
632         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
633                 goto tx_error;
634 
635         rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
636                          NULL;
637 
638         if (!rt) {
639                 rt = ip_route_output_key(tunnel->net, &fl4);
640 
641                 if (IS_ERR(rt)) {
642                         dev->stats.tx_carrier_errors++;
643                         goto tx_error;
644                 }
645                 if (connected)
646                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
647                                           fl4.saddr);
648         }
649 
650         if (rt->dst.dev == dev) {
651                 ip_rt_put(rt);
652                 dev->stats.collisions++;
653                 goto tx_error;
654         }
655 
656         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
657                 ip_rt_put(rt);
658                 goto tx_error;
659         }
660 
661         if (tunnel->err_count > 0) {
662                 if (time_before(jiffies,
663                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
664                         tunnel->err_count--;
665 
666                         dst_link_failure(skb);
667                 } else
668                         tunnel->err_count = 0;
669         }
670 
671         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
672         ttl = tnl_params->ttl;
673         if (ttl == 0) {
674                 if (skb->protocol == htons(ETH_P_IP))
675                         ttl = inner_iph->ttl;
676 #if IS_ENABLED(CONFIG_IPV6)
677                 else if (skb->protocol == htons(ETH_P_IPV6))
678                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
679 #endif
680                 else
681                         ttl = ip4_dst_hoplimit(&rt->dst);
682         }
683 
684         df = tnl_params->frag_off;
685         if (skb->protocol == htons(ETH_P_IP))
686                 df |= (inner_iph->frag_off&htons(IP_DF));
687 
688         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
689                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
690         if (max_headroom > dev->needed_headroom)
691                 dev->needed_headroom = max_headroom;
692 
693         if (skb_cow_head(skb, dev->needed_headroom)) {
694                 ip_rt_put(rt);
695                 dev->stats.tx_dropped++;
696                 kfree_skb(skb);
697                 return;
698         }
699 
700         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
701                       df, !net_eq(tunnel->net, dev_net(dev)));
702         return;
703 
704 #if IS_ENABLED(CONFIG_IPV6)
705 tx_error_icmp:
706         dst_link_failure(skb);
707 #endif
708 tx_error:
709         dev->stats.tx_errors++;
710         kfree_skb(skb);
711 }
712 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
713 
714 static void ip_tunnel_update(struct ip_tunnel_net *itn,
715                              struct ip_tunnel *t,
716                              struct net_device *dev,
717                              struct ip_tunnel_parm *p,
718                              bool set_mtu)
719 {
720         ip_tunnel_del(itn, t);
721         t->parms.iph.saddr = p->iph.saddr;
722         t->parms.iph.daddr = p->iph.daddr;
723         t->parms.i_key = p->i_key;
724         t->parms.o_key = p->o_key;
725         if (dev->type != ARPHRD_ETHER) {
726                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
727                 memcpy(dev->broadcast, &p->iph.daddr, 4);
728         }
729         ip_tunnel_add(itn, t);
730 
731         t->parms.iph.ttl = p->iph.ttl;
732         t->parms.iph.tos = p->iph.tos;
733         t->parms.iph.frag_off = p->iph.frag_off;
734 
735         if (t->parms.link != p->link) {
736                 int mtu;
737 
738                 t->parms.link = p->link;
739                 mtu = ip_tunnel_bind_dev(dev);
740                 if (set_mtu)
741                         dev->mtu = mtu;
742         }
743         dst_cache_reset(&t->dst_cache);
744         netdev_state_change(dev);
745 }
746 
747 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
748 {
749         int err = 0;
750         struct ip_tunnel *t = netdev_priv(dev);
751         struct net *net = t->net;
752         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
753 
754         BUG_ON(!itn->fb_tunnel_dev);
755         switch (cmd) {
756         case SIOCGETTUNNEL:
757                 if (dev == itn->fb_tunnel_dev) {
758                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
759                         if (!t)
760                                 t = netdev_priv(dev);
761                 }
762                 memcpy(p, &t->parms, sizeof(*p));
763                 break;
764 
765         case SIOCADDTUNNEL:
766         case SIOCCHGTUNNEL:
767                 err = -EPERM;
768                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
769                         goto done;
770                 if (p->iph.ttl)
771                         p->iph.frag_off |= htons(IP_DF);
772                 if (!(p->i_flags & VTI_ISVTI)) {
773                         if (!(p->i_flags & TUNNEL_KEY))
774                                 p->i_key = 0;
775                         if (!(p->o_flags & TUNNEL_KEY))
776                                 p->o_key = 0;
777                 }
778 
779                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
780 
781                 if (cmd == SIOCADDTUNNEL) {
782                         if (!t) {
783                                 t = ip_tunnel_create(net, itn, p);
784                                 err = PTR_ERR_OR_ZERO(t);
785                                 break;
786                         }
787 
788                         err = -EEXIST;
789                         break;
790                 }
791                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
792                         if (t) {
793                                 if (t->dev != dev) {
794                                         err = -EEXIST;
795                                         break;
796                                 }
797                         } else {
798                                 unsigned int nflags = 0;
799 
800                                 if (ipv4_is_multicast(p->iph.daddr))
801                                         nflags = IFF_BROADCAST;
802                                 else if (p->iph.daddr)
803                                         nflags = IFF_POINTOPOINT;
804 
805                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
806                                         err = -EINVAL;
807                                         break;
808                                 }
809 
810                                 t = netdev_priv(dev);
811                         }
812                 }
813 
814                 if (t) {
815                         err = 0;
816                         ip_tunnel_update(itn, t, dev, p, true);
817                 } else {
818                         err = -ENOENT;
819                 }
820                 break;
821 
822         case SIOCDELTUNNEL:
823                 err = -EPERM;
824                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
825                         goto done;
826 
827                 if (dev == itn->fb_tunnel_dev) {
828                         err = -ENOENT;
829                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
830                         if (!t)
831                                 goto done;
832                         err = -EPERM;
833                         if (t == netdev_priv(itn->fb_tunnel_dev))
834                                 goto done;
835                         dev = t->dev;
836                 }
837                 unregister_netdevice(dev);
838                 err = 0;
839                 break;
840 
841         default:
842                 err = -EINVAL;
843         }
844 
845 done:
846         return err;
847 }
848 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
849 
850 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
851 {
852         struct ip_tunnel *tunnel = netdev_priv(dev);
853         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
854         int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
855 
856         if (new_mtu < 68)
857                 return -EINVAL;
858 
859         if (new_mtu > max_mtu) {
860                 if (strict)
861                         return -EINVAL;
862 
863                 new_mtu = max_mtu;
864         }
865 
866         dev->mtu = new_mtu;
867         return 0;
868 }
869 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
870 
871 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
872 {
873         return __ip_tunnel_change_mtu(dev, new_mtu, true);
874 }
875 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
876 
877 static void ip_tunnel_dev_free(struct net_device *dev)
878 {
879         struct ip_tunnel *tunnel = netdev_priv(dev);
880 
881         gro_cells_destroy(&tunnel->gro_cells);
882         dst_cache_destroy(&tunnel->dst_cache);
883         free_percpu(dev->tstats);
884         free_netdev(dev);
885 }
886 
887 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
888 {
889         struct ip_tunnel *tunnel = netdev_priv(dev);
890         struct ip_tunnel_net *itn;
891 
892         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
893 
894         if (itn->fb_tunnel_dev != dev) {
895                 ip_tunnel_del(itn, netdev_priv(dev));
896                 unregister_netdevice_queue(dev, head);
897         }
898 }
899 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
900 
901 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
902 {
903         struct ip_tunnel *tunnel = netdev_priv(dev);
904 
905         return tunnel->net;
906 }
907 EXPORT_SYMBOL(ip_tunnel_get_link_net);
908 
909 int ip_tunnel_get_iflink(const struct net_device *dev)
910 {
911         struct ip_tunnel *tunnel = netdev_priv(dev);
912 
913         return tunnel->parms.link;
914 }
915 EXPORT_SYMBOL(ip_tunnel_get_iflink);
916 
917 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
918                                   struct rtnl_link_ops *ops, char *devname)
919 {
920         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
921         struct ip_tunnel_parm parms;
922         unsigned int i;
923 
924         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
925                 INIT_HLIST_HEAD(&itn->tunnels[i]);
926 
927         if (!ops) {
928                 itn->fb_tunnel_dev = NULL;
929                 return 0;
930         }
931 
932         memset(&parms, 0, sizeof(parms));
933         if (devname)
934                 strlcpy(parms.name, devname, IFNAMSIZ);
935 
936         rtnl_lock();
937         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
938         /* FB netdevice is special: we have one, and only one per netns.
939          * Allowing to move it to another netns is clearly unsafe.
940          */
941         if (!IS_ERR(itn->fb_tunnel_dev)) {
942                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
943                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
944                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
945         }
946         rtnl_unlock();
947 
948         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
949 }
950 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
951 
952 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
953                               struct rtnl_link_ops *ops)
954 {
955         struct net *net = dev_net(itn->fb_tunnel_dev);
956         struct net_device *dev, *aux;
957         int h;
958 
959         for_each_netdev_safe(net, dev, aux)
960                 if (dev->rtnl_link_ops == ops)
961                         unregister_netdevice_queue(dev, head);
962 
963         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
964                 struct ip_tunnel *t;
965                 struct hlist_node *n;
966                 struct hlist_head *thead = &itn->tunnels[h];
967 
968                 hlist_for_each_entry_safe(t, n, thead, hash_node)
969                         /* If dev is in the same netns, it has already
970                          * been added to the list by the previous loop.
971                          */
972                         if (!net_eq(dev_net(t->dev), net))
973                                 unregister_netdevice_queue(t->dev, head);
974         }
975 }
976 
977 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
978 {
979         LIST_HEAD(list);
980 
981         rtnl_lock();
982         ip_tunnel_destroy(itn, &list, ops);
983         unregister_netdevice_many(&list);
984         rtnl_unlock();
985 }
986 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
987 
988 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
989                       struct ip_tunnel_parm *p)
990 {
991         struct ip_tunnel *nt;
992         struct net *net = dev_net(dev);
993         struct ip_tunnel_net *itn;
994         int mtu;
995         int err;
996 
997         nt = netdev_priv(dev);
998         itn = net_generic(net, nt->ip_tnl_net_id);
999 
1000         if (nt->collect_md) {
1001                 if (rtnl_dereference(itn->collect_md_tun))
1002                         return -EEXIST;
1003         } else {
1004                 if (ip_tunnel_find(itn, p, dev->type))
1005                         return -EEXIST;
1006         }
1007 
1008         nt->net = net;
1009         nt->parms = *p;
1010         err = register_netdevice(dev);
1011         if (err)
1012                 goto out;
1013 
1014         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1015                 eth_hw_addr_random(dev);
1016 
1017         mtu = ip_tunnel_bind_dev(dev);
1018         if (!tb[IFLA_MTU])
1019                 dev->mtu = mtu;
1020 
1021         ip_tunnel_add(itn, nt);
1022 out:
1023         return err;
1024 }
1025 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1026 
1027 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1028                          struct ip_tunnel_parm *p)
1029 {
1030         struct ip_tunnel *t;
1031         struct ip_tunnel *tunnel = netdev_priv(dev);
1032         struct net *net = tunnel->net;
1033         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1034 
1035         if (dev == itn->fb_tunnel_dev)
1036                 return -EINVAL;
1037 
1038         t = ip_tunnel_find(itn, p, dev->type);
1039 
1040         if (t) {
1041                 if (t->dev != dev)
1042                         return -EEXIST;
1043         } else {
1044                 t = tunnel;
1045 
1046                 if (dev->type != ARPHRD_ETHER) {
1047                         unsigned int nflags = 0;
1048 
1049                         if (ipv4_is_multicast(p->iph.daddr))
1050                                 nflags = IFF_BROADCAST;
1051                         else if (p->iph.daddr)
1052                                 nflags = IFF_POINTOPOINT;
1053 
1054                         if ((dev->flags ^ nflags) &
1055                             (IFF_POINTOPOINT | IFF_BROADCAST))
1056                                 return -EINVAL;
1057                 }
1058         }
1059 
1060         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
1061         return 0;
1062 }
1063 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1064 
1065 int ip_tunnel_init(struct net_device *dev)
1066 {
1067         struct ip_tunnel *tunnel = netdev_priv(dev);
1068         struct iphdr *iph = &tunnel->parms.iph;
1069         int err;
1070 
1071         dev->destructor = ip_tunnel_dev_free;
1072         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1073         if (!dev->tstats)
1074                 return -ENOMEM;
1075 
1076         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1077         if (err) {
1078                 free_percpu(dev->tstats);
1079                 return err;
1080         }
1081 
1082         err = gro_cells_init(&tunnel->gro_cells, dev);
1083         if (err) {
1084                 dst_cache_destroy(&tunnel->dst_cache);
1085                 free_percpu(dev->tstats);
1086                 return err;
1087         }
1088 
1089         tunnel->dev = dev;
1090         tunnel->net = dev_net(dev);
1091         strcpy(tunnel->parms.name, dev->name);
1092         iph->version            = 4;
1093         iph->ihl                = 5;
1094 
1095         if (tunnel->collect_md) {
1096                 dev->features |= NETIF_F_NETNS_LOCAL;
1097                 netif_keep_dst(dev);
1098         }
1099         return 0;
1100 }
1101 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1102 
1103 void ip_tunnel_uninit(struct net_device *dev)
1104 {
1105         struct ip_tunnel *tunnel = netdev_priv(dev);
1106         struct net *net = tunnel->net;
1107         struct ip_tunnel_net *itn;
1108 
1109         itn = net_generic(net, tunnel->ip_tnl_net_id);
1110         /* fb_tunnel_dev will be unregisted in net-exit call. */
1111         if (itn->fb_tunnel_dev != dev)
1112                 ip_tunnel_del(itn, netdev_priv(dev));
1113 
1114         dst_cache_reset(&tunnel->dst_cache);
1115 }
1116 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1117 
1118 /* Do least required initialization, rest of init is done in tunnel_init call */
1119 void ip_tunnel_setup(struct net_device *dev, int net_id)
1120 {
1121         struct ip_tunnel *tunnel = netdev_priv(dev);
1122         tunnel->ip_tnl_net_id = net_id;
1123 }
1124 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1125 
1126 MODULE_LICENSE("GPL");
1127 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp