~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_tunnel.c

Version: ~ [ linux-5.12 ] ~ [ linux-5.11.16 ] ~ [ linux-5.10.32 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.114 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.188 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.231 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.267 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.267 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Copyright (c) 2013 Nicira, Inc.
  3  *
  4  * This program is free software; you can redistribute it and/or
  5  * modify it under the terms of version 2 of the GNU General Public
  6  * License as published by the Free Software Foundation.
  7  *
  8  * This program is distributed in the hope that it will be useful, but
  9  * WITHOUT ANY WARRANTY; without even the implied warranty of
 10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 11  * General Public License for more details.
 12  *
 13  * You should have received a copy of the GNU General Public License
 14  * along with this program; if not, write to the Free Software
 15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 16  * 02110-1301, USA
 17  */
 18 
 19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 20 
 21 #include <linux/capability.h>
 22 #include <linux/module.h>
 23 #include <linux/types.h>
 24 #include <linux/kernel.h>
 25 #include <linux/slab.h>
 26 #include <linux/uaccess.h>
 27 #include <linux/skbuff.h>
 28 #include <linux/netdevice.h>
 29 #include <linux/in.h>
 30 #include <linux/tcp.h>
 31 #include <linux/udp.h>
 32 #include <linux/if_arp.h>
 33 #include <linux/init.h>
 34 #include <linux/in6.h>
 35 #include <linux/inetdevice.h>
 36 #include <linux/igmp.h>
 37 #include <linux/netfilter_ipv4.h>
 38 #include <linux/etherdevice.h>
 39 #include <linux/if_ether.h>
 40 #include <linux/if_vlan.h>
 41 #include <linux/rculist.h>
 42 #include <linux/err.h>
 43 
 44 #include <net/sock.h>
 45 #include <net/ip.h>
 46 #include <net/icmp.h>
 47 #include <net/protocol.h>
 48 #include <net/ip_tunnels.h>
 49 #include <net/arp.h>
 50 #include <net/checksum.h>
 51 #include <net/dsfield.h>
 52 #include <net/inet_ecn.h>
 53 #include <net/xfrm.h>
 54 #include <net/net_namespace.h>
 55 #include <net/netns/generic.h>
 56 #include <net/rtnetlink.h>
 57 #include <net/udp.h>
 58 #include <net/dst_metadata.h>
 59 
 60 #if IS_ENABLED(CONFIG_IPV6)
 61 #include <net/ipv6.h>
 62 #include <net/ip6_fib.h>
 63 #include <net/ip6_route.h>
 64 #endif
 65 
 66 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
 67 {
 68         return hash_32((__force u32)key ^ (__force u32)remote,
 69                          IP_TNL_HASH_BITS);
 70 }
 71 
 72 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
 73                                 __be16 flags, __be32 key)
 74 {
 75         if (p->i_flags & TUNNEL_KEY) {
 76                 if (flags & TUNNEL_KEY)
 77                         return key == p->i_key;
 78                 else
 79                         /* key expected, none present */
 80                         return false;
 81         } else
 82                 return !(flags & TUNNEL_KEY);
 83 }
 84 
 85 /* Fallback tunnel: no source, no destination, no key, no options
 86 
 87    Tunnel hash table:
 88    We require exact key match i.e. if a key is present in packet
 89    it will match only tunnel with the same key; if it is not present,
 90    it will match only keyless tunnel.
 91 
 92    All keysless packets, if not matched configured keyless tunnels
 93    will match fallback tunnel.
 94    Given src, dst and key, find appropriate for input tunnel.
 95 */
 96 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
 97                                    int link, __be16 flags,
 98                                    __be32 remote, __be32 local,
 99                                    __be32 key)
100 {
101         unsigned int hash;
102         struct ip_tunnel *t, *cand = NULL;
103         struct hlist_head *head;
104 
105         hash = ip_tunnel_hash(key, remote);
106         head = &itn->tunnels[hash];
107 
108         hlist_for_each_entry_rcu(t, head, hash_node) {
109                 if (local != t->parms.iph.saddr ||
110                     remote != t->parms.iph.daddr ||
111                     !(t->dev->flags & IFF_UP))
112                         continue;
113 
114                 if (!ip_tunnel_key_match(&t->parms, flags, key))
115                         continue;
116 
117                 if (t->parms.link == link)
118                         return t;
119                 else
120                         cand = t;
121         }
122 
123         hlist_for_each_entry_rcu(t, head, hash_node) {
124                 if (remote != t->parms.iph.daddr ||
125                     t->parms.iph.saddr != 0 ||
126                     !(t->dev->flags & IFF_UP))
127                         continue;
128 
129                 if (!ip_tunnel_key_match(&t->parms, flags, key))
130                         continue;
131 
132                 if (t->parms.link == link)
133                         return t;
134                 else if (!cand)
135                         cand = t;
136         }
137 
138         hash = ip_tunnel_hash(key, 0);
139         head = &itn->tunnels[hash];
140 
141         hlist_for_each_entry_rcu(t, head, hash_node) {
142                 if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) &&
143                     (local != t->parms.iph.daddr || !ipv4_is_multicast(local)))
144                         continue;
145 
146                 if (!(t->dev->flags & IFF_UP))
147                         continue;
148 
149                 if (!ip_tunnel_key_match(&t->parms, flags, key))
150                         continue;
151 
152                 if (t->parms.link == link)
153                         return t;
154                 else if (!cand)
155                         cand = t;
156         }
157 
158         if (flags & TUNNEL_NO_KEY)
159                 goto skip_key_lookup;
160 
161         hlist_for_each_entry_rcu(t, head, hash_node) {
162                 if (t->parms.i_key != key ||
163                     t->parms.iph.saddr != 0 ||
164                     t->parms.iph.daddr != 0 ||
165                     !(t->dev->flags & IFF_UP))
166                         continue;
167 
168                 if (t->parms.link == link)
169                         return t;
170                 else if (!cand)
171                         cand = t;
172         }
173 
174 skip_key_lookup:
175         if (cand)
176                 return cand;
177 
178         t = rcu_dereference(itn->collect_md_tun);
179         if (t)
180                 return t;
181 
182         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
183                 return netdev_priv(itn->fb_tunnel_dev);
184 
185         return NULL;
186 }
187 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
188 
189 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
190                                     struct ip_tunnel_parm *parms)
191 {
192         unsigned int h;
193         __be32 remote;
194         __be32 i_key = parms->i_key;
195 
196         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
197                 remote = parms->iph.daddr;
198         else
199                 remote = 0;
200 
201         if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
202                 i_key = 0;
203 
204         h = ip_tunnel_hash(i_key, remote);
205         return &itn->tunnels[h];
206 }
207 
208 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
209 {
210         struct hlist_head *head = ip_bucket(itn, &t->parms);
211 
212         if (t->collect_md)
213                 rcu_assign_pointer(itn->collect_md_tun, t);
214         hlist_add_head_rcu(&t->hash_node, head);
215 }
216 
217 static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
218 {
219         if (t->collect_md)
220                 rcu_assign_pointer(itn->collect_md_tun, NULL);
221         hlist_del_init_rcu(&t->hash_node);
222 }
223 
224 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
225                                         struct ip_tunnel_parm *parms,
226                                         int type)
227 {
228         __be32 remote = parms->iph.daddr;
229         __be32 local = parms->iph.saddr;
230         __be32 key = parms->i_key;
231         __be16 flags = parms->i_flags;
232         int link = parms->link;
233         struct ip_tunnel *t = NULL;
234         struct hlist_head *head = ip_bucket(itn, parms);
235 
236         hlist_for_each_entry_rcu(t, head, hash_node) {
237                 if (local == t->parms.iph.saddr &&
238                     remote == t->parms.iph.daddr &&
239                     link == t->parms.link &&
240                     type == t->dev->type &&
241                     ip_tunnel_key_match(&t->parms, flags, key))
242                         break;
243         }
244         return t;
245 }
246 
247 static struct net_device *__ip_tunnel_create(struct net *net,
248                                              const struct rtnl_link_ops *ops,
249                                              struct ip_tunnel_parm *parms)
250 {
251         int err;
252         struct ip_tunnel *tunnel;
253         struct net_device *dev;
254         char name[IFNAMSIZ];
255 
256         if (parms->name[0])
257                 strlcpy(name, parms->name, IFNAMSIZ);
258         else {
259                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
260                         err = -E2BIG;
261                         goto failed;
262                 }
263                 strlcpy(name, ops->kind, IFNAMSIZ);
264                 strncat(name, "%d", 2);
265         }
266 
267         ASSERT_RTNL();
268         dev = alloc_netdev(ops->priv_size, name, NET_NAME_UNKNOWN, ops->setup);
269         if (!dev) {
270                 err = -ENOMEM;
271                 goto failed;
272         }
273         dev_net_set(dev, net);
274 
275         dev->rtnl_link_ops = ops;
276 
277         tunnel = netdev_priv(dev);
278         tunnel->parms = *parms;
279         tunnel->net = net;
280 
281         err = register_netdevice(dev);
282         if (err)
283                 goto failed_free;
284 
285         return dev;
286 
287 failed_free:
288         free_netdev(dev);
289 failed:
290         return ERR_PTR(err);
291 }
292 
293 static inline void init_tunnel_flow(struct flowi4 *fl4,
294                                     int proto,
295                                     __be32 daddr, __be32 saddr,
296                                     __be32 key, __u8 tos, int oif,
297                                     __u32 mark)
298 {
299         memset(fl4, 0, sizeof(*fl4));
300         fl4->flowi4_oif = oif;
301         fl4->daddr = daddr;
302         fl4->saddr = saddr;
303         fl4->flowi4_tos = tos;
304         fl4->flowi4_proto = proto;
305         fl4->fl4_gre_key = key;
306         fl4->flowi4_mark = mark;
307 }
308 
309 static int ip_tunnel_bind_dev(struct net_device *dev)
310 {
311         struct net_device *tdev = NULL;
312         struct ip_tunnel *tunnel = netdev_priv(dev);
313         const struct iphdr *iph;
314         int hlen = LL_MAX_HEADER;
315         int mtu = ETH_DATA_LEN;
316         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
317 
318         iph = &tunnel->parms.iph;
319 
320         /* Guess output device to choose reasonable mtu and needed_headroom */
321         if (iph->daddr) {
322                 struct flowi4 fl4;
323                 struct rtable *rt;
324 
325                 init_tunnel_flow(&fl4, iph->protocol, iph->daddr,
326                                  iph->saddr, tunnel->parms.o_key,
327                                  RT_TOS(iph->tos), tunnel->parms.link,
328                                  tunnel->fwmark);
329                 rt = ip_route_output_key(tunnel->net, &fl4);
330 
331                 if (!IS_ERR(rt)) {
332                         tdev = rt->dst.dev;
333                         ip_rt_put(rt);
334                 }
335                 if (dev->type != ARPHRD_ETHER)
336                         dev->flags |= IFF_POINTOPOINT;
337 
338                 dst_cache_reset(&tunnel->dst_cache);
339         }
340 
341         if (!tdev && tunnel->parms.link)
342                 tdev = __dev_get_by_index(tunnel->net, tunnel->parms.link);
343 
344         if (tdev) {
345                 hlen = tdev->hard_header_len + tdev->needed_headroom;
346                 mtu = tdev->mtu;
347         }
348 
349         dev->needed_headroom = t_hlen + hlen;
350         mtu -= (dev->hard_header_len + t_hlen);
351 
352         if (mtu < 68)
353                 mtu = 68;
354 
355         return mtu;
356 }
357 
358 static struct ip_tunnel *ip_tunnel_create(struct net *net,
359                                           struct ip_tunnel_net *itn,
360                                           struct ip_tunnel_parm *parms)
361 {
362         struct ip_tunnel *nt;
363         struct net_device *dev;
364         int t_hlen;
365 
366         BUG_ON(!itn->fb_tunnel_dev);
367         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
368         if (IS_ERR(dev))
369                 return ERR_CAST(dev);
370 
371         dev->mtu = ip_tunnel_bind_dev(dev);
372 
373         nt = netdev_priv(dev);
374         t_hlen = nt->hlen + sizeof(struct iphdr);
375         dev->min_mtu = ETH_MIN_MTU;
376         dev->max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
377         ip_tunnel_add(itn, nt);
378         return nt;
379 }
380 
381 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
382                   const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
383                   bool log_ecn_error)
384 {
385         struct pcpu_sw_netstats *tstats;
386         const struct iphdr *iph = ip_hdr(skb);
387         int err;
388 
389 #ifdef CONFIG_NET_IPGRE_BROADCAST
390         if (ipv4_is_multicast(iph->daddr)) {
391                 tunnel->dev->stats.multicast++;
392                 skb->pkt_type = PACKET_BROADCAST;
393         }
394 #endif
395 
396         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
397              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
398                 tunnel->dev->stats.rx_crc_errors++;
399                 tunnel->dev->stats.rx_errors++;
400                 goto drop;
401         }
402 
403         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
404                 if (!(tpi->flags&TUNNEL_SEQ) ||
405                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
406                         tunnel->dev->stats.rx_fifo_errors++;
407                         tunnel->dev->stats.rx_errors++;
408                         goto drop;
409                 }
410                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
411         }
412 
413         skb_reset_network_header(skb);
414 
415         err = IP_ECN_decapsulate(iph, skb);
416         if (unlikely(err)) {
417                 if (log_ecn_error)
418                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
419                                         &iph->saddr, iph->tos);
420                 if (err > 1) {
421                         ++tunnel->dev->stats.rx_frame_errors;
422                         ++tunnel->dev->stats.rx_errors;
423                         goto drop;
424                 }
425         }
426 
427         tstats = this_cpu_ptr(tunnel->dev->tstats);
428         u64_stats_update_begin(&tstats->syncp);
429         tstats->rx_packets++;
430         tstats->rx_bytes += skb->len;
431         u64_stats_update_end(&tstats->syncp);
432 
433         skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
434 
435         if (tunnel->dev->type == ARPHRD_ETHER) {
436                 skb->protocol = eth_type_trans(skb, tunnel->dev);
437                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
438         } else {
439                 skb->dev = tunnel->dev;
440         }
441 
442         if (tun_dst)
443                 skb_dst_set(skb, (struct dst_entry *)tun_dst);
444 
445         gro_cells_receive(&tunnel->gro_cells, skb);
446         return 0;
447 
448 drop:
449         if (tun_dst)
450                 dst_release((struct dst_entry *)tun_dst);
451         kfree_skb(skb);
452         return 0;
453 }
454 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
455 
456 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
457                             unsigned int num)
458 {
459         if (num >= MAX_IPTUN_ENCAP_OPS)
460                 return -ERANGE;
461 
462         return !cmpxchg((const struct ip_tunnel_encap_ops **)
463                         &iptun_encaps[num],
464                         NULL, ops) ? 0 : -1;
465 }
466 EXPORT_SYMBOL(ip_tunnel_encap_add_ops);
467 
468 int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *ops,
469                             unsigned int num)
470 {
471         int ret;
472 
473         if (num >= MAX_IPTUN_ENCAP_OPS)
474                 return -ERANGE;
475 
476         ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
477                        &iptun_encaps[num],
478                        ops, NULL) == ops) ? 0 : -1;
479 
480         synchronize_net();
481 
482         return ret;
483 }
484 EXPORT_SYMBOL(ip_tunnel_encap_del_ops);
485 
486 int ip_tunnel_encap_setup(struct ip_tunnel *t,
487                           struct ip_tunnel_encap *ipencap)
488 {
489         int hlen;
490 
491         memset(&t->encap, 0, sizeof(t->encap));
492 
493         hlen = ip_encap_hlen(ipencap);
494         if (hlen < 0)
495                 return hlen;
496 
497         t->encap.type = ipencap->type;
498         t->encap.sport = ipencap->sport;
499         t->encap.dport = ipencap->dport;
500         t->encap.flags = ipencap->flags;
501 
502         t->encap_hlen = hlen;
503         t->hlen = t->encap_hlen + t->tun_hlen;
504 
505         return 0;
506 }
507 EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
508 
509 static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
510                             struct rtable *rt, __be16 df,
511                             const struct iphdr *inner_iph)
512 {
513         struct ip_tunnel *tunnel = netdev_priv(dev);
514         int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
515         int mtu;
516 
517         if (df)
518                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
519                                         - sizeof(struct iphdr) - tunnel->hlen;
520         else
521                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
522 
523         if (skb_dst(skb))
524                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
525 
526         if (skb->protocol == htons(ETH_P_IP)) {
527                 if (!skb_is_gso(skb) &&
528                     (inner_iph->frag_off & htons(IP_DF)) &&
529                     mtu < pkt_size) {
530                         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
531                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
532                         return -E2BIG;
533                 }
534         }
535 #if IS_ENABLED(CONFIG_IPV6)
536         else if (skb->protocol == htons(ETH_P_IPV6)) {
537                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
538 
539                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
540                            mtu >= IPV6_MIN_MTU) {
541                         if ((tunnel->parms.iph.daddr &&
542                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
543                             rt6->rt6i_dst.plen == 128) {
544                                 rt6->rt6i_flags |= RTF_MODIFIED;
545                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
546                         }
547                 }
548 
549                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
550                                         mtu < pkt_size) {
551                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
552                         return -E2BIG;
553                 }
554         }
555 #endif
556         return 0;
557 }
558 
559 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
560 {
561         struct ip_tunnel *tunnel = netdev_priv(dev);
562         u32 headroom = sizeof(struct iphdr);
563         struct ip_tunnel_info *tun_info;
564         const struct ip_tunnel_key *key;
565         const struct iphdr *inner_iph;
566         struct rtable *rt;
567         struct flowi4 fl4;
568         __be16 df = 0;
569         u8 tos, ttl;
570 
571         tun_info = skb_tunnel_info(skb);
572         if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
573                      ip_tunnel_info_af(tun_info) != AF_INET))
574                 goto tx_error;
575         key = &tun_info->key;
576         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
577         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
578         tos = key->tos;
579         if (tos == 1) {
580                 if (skb->protocol == htons(ETH_P_IP))
581                         tos = inner_iph->tos;
582                 else if (skb->protocol == htons(ETH_P_IPV6))
583                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
584         }
585         init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
586                          RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
587         if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
588                 goto tx_error;
589         rt = ip_route_output_key(tunnel->net, &fl4);
590         if (IS_ERR(rt)) {
591                 dev->stats.tx_carrier_errors++;
592                 goto tx_error;
593         }
594         if (rt->dst.dev == dev) {
595                 ip_rt_put(rt);
596                 dev->stats.collisions++;
597                 goto tx_error;
598         }
599         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
600         ttl = key->ttl;
601         if (ttl == 0) {
602                 if (skb->protocol == htons(ETH_P_IP))
603                         ttl = inner_iph->ttl;
604                 else if (skb->protocol == htons(ETH_P_IPV6))
605                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
606                 else
607                         ttl = ip4_dst_hoplimit(&rt->dst);
608         }
609         if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
610                 df = htons(IP_DF);
611         else if (skb->protocol == htons(ETH_P_IP))
612                 df = inner_iph->frag_off & htons(IP_DF);
613         headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
614         if (headroom > dev->needed_headroom)
615                 dev->needed_headroom = headroom;
616 
617         if (skb_cow_head(skb, dev->needed_headroom)) {
618                 ip_rt_put(rt);
619                 goto tx_dropped;
620         }
621         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
622                       df, !net_eq(tunnel->net, dev_net(dev)));
623         return;
624 tx_error:
625         dev->stats.tx_errors++;
626         goto kfree;
627 tx_dropped:
628         dev->stats.tx_dropped++;
629 kfree:
630         kfree_skb(skb);
631 }
632 EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit);
633 
634 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
635                     const struct iphdr *tnl_params, u8 protocol)
636 {
637         struct ip_tunnel *tunnel = netdev_priv(dev);
638         const struct iphdr *inner_iph;
639         struct flowi4 fl4;
640         u8     tos, ttl;
641         __be16 df;
642         struct rtable *rt;              /* Route to the other host */
643         unsigned int max_headroom;      /* The extra header space needed */
644         __be32 dst;
645         bool connected;
646 
647         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
648         connected = (tunnel->parms.iph.daddr != 0);
649 
650         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
651 
652         dst = tnl_params->daddr;
653         if (dst == 0) {
654                 /* NBMA tunnel */
655 
656                 if (!skb_dst(skb)) {
657                         dev->stats.tx_fifo_errors++;
658                         goto tx_error;
659                 }
660 
661                 if (skb->protocol == htons(ETH_P_IP)) {
662                         rt = skb_rtable(skb);
663                         dst = rt_nexthop(rt, inner_iph->daddr);
664                 }
665 #if IS_ENABLED(CONFIG_IPV6)
666                 else if (skb->protocol == htons(ETH_P_IPV6)) {
667                         const struct in6_addr *addr6;
668                         struct neighbour *neigh;
669                         bool do_tx_error_icmp;
670                         int addr_type;
671 
672                         neigh = dst_neigh_lookup(skb_dst(skb),
673                                                  &ipv6_hdr(skb)->daddr);
674                         if (!neigh)
675                                 goto tx_error;
676 
677                         addr6 = (const struct in6_addr *)&neigh->primary_key;
678                         addr_type = ipv6_addr_type(addr6);
679 
680                         if (addr_type == IPV6_ADDR_ANY) {
681                                 addr6 = &ipv6_hdr(skb)->daddr;
682                                 addr_type = ipv6_addr_type(addr6);
683                         }
684 
685                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
686                                 do_tx_error_icmp = true;
687                         else {
688                                 do_tx_error_icmp = false;
689                                 dst = addr6->s6_addr32[3];
690                         }
691                         neigh_release(neigh);
692                         if (do_tx_error_icmp)
693                                 goto tx_error_icmp;
694                 }
695 #endif
696                 else
697                         goto tx_error;
698 
699                 connected = false;
700         }
701 
702         tos = tnl_params->tos;
703         if (tos & 0x1) {
704                 tos &= ~0x1;
705                 if (skb->protocol == htons(ETH_P_IP)) {
706                         tos = inner_iph->tos;
707                         connected = false;
708                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
709                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
710                         connected = false;
711                 }
712         }
713 
714         init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
715                          tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
716                          tunnel->fwmark);
717 
718         if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
719                 goto tx_error;
720 
721         rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
722                          NULL;
723 
724         if (!rt) {
725                 rt = ip_route_output_key(tunnel->net, &fl4);
726 
727                 if (IS_ERR(rt)) {
728                         dev->stats.tx_carrier_errors++;
729                         goto tx_error;
730                 }
731                 if (connected)
732                         dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
733                                           fl4.saddr);
734         }
735 
736         if (rt->dst.dev == dev) {
737                 ip_rt_put(rt);
738                 dev->stats.collisions++;
739                 goto tx_error;
740         }
741 
742         if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
743                 ip_rt_put(rt);
744                 goto tx_error;
745         }
746 
747         if (tunnel->err_count > 0) {
748                 if (time_before(jiffies,
749                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
750                         tunnel->err_count--;
751 
752                         dst_link_failure(skb);
753                 } else
754                         tunnel->err_count = 0;
755         }
756 
757         tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
758         ttl = tnl_params->ttl;
759         if (ttl == 0) {
760                 if (skb->protocol == htons(ETH_P_IP))
761                         ttl = inner_iph->ttl;
762 #if IS_ENABLED(CONFIG_IPV6)
763                 else if (skb->protocol == htons(ETH_P_IPV6))
764                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
765 #endif
766                 else
767                         ttl = ip4_dst_hoplimit(&rt->dst);
768         }
769 
770         df = tnl_params->frag_off;
771         if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
772                 df |= (inner_iph->frag_off&htons(IP_DF));
773 
774         max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
775                         + rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
776         if (max_headroom > dev->needed_headroom)
777                 dev->needed_headroom = max_headroom;
778 
779         if (skb_cow_head(skb, dev->needed_headroom)) {
780                 ip_rt_put(rt);
781                 dev->stats.tx_dropped++;
782                 kfree_skb(skb);
783                 return;
784         }
785 
786         iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
787                       df, !net_eq(tunnel->net, dev_net(dev)));
788         return;
789 
790 #if IS_ENABLED(CONFIG_IPV6)
791 tx_error_icmp:
792         dst_link_failure(skb);
793 #endif
794 tx_error:
795         dev->stats.tx_errors++;
796         kfree_skb(skb);
797 }
798 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
799 
800 static void ip_tunnel_update(struct ip_tunnel_net *itn,
801                              struct ip_tunnel *t,
802                              struct net_device *dev,
803                              struct ip_tunnel_parm *p,
804                              bool set_mtu,
805                              __u32 fwmark)
806 {
807         ip_tunnel_del(itn, t);
808         t->parms.iph.saddr = p->iph.saddr;
809         t->parms.iph.daddr = p->iph.daddr;
810         t->parms.i_key = p->i_key;
811         t->parms.o_key = p->o_key;
812         if (dev->type != ARPHRD_ETHER) {
813                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
814                 memcpy(dev->broadcast, &p->iph.daddr, 4);
815         }
816         ip_tunnel_add(itn, t);
817 
818         t->parms.iph.ttl = p->iph.ttl;
819         t->parms.iph.tos = p->iph.tos;
820         t->parms.iph.frag_off = p->iph.frag_off;
821 
822         if (t->parms.link != p->link || t->fwmark != fwmark) {
823                 int mtu;
824 
825                 t->parms.link = p->link;
826                 t->fwmark = fwmark;
827                 mtu = ip_tunnel_bind_dev(dev);
828                 if (set_mtu)
829                         dev->mtu = mtu;
830         }
831         dst_cache_reset(&t->dst_cache);
832         netdev_state_change(dev);
833 }
834 
835 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
836 {
837         int err = 0;
838         struct ip_tunnel *t = netdev_priv(dev);
839         struct net *net = t->net;
840         struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);
841 
842         BUG_ON(!itn->fb_tunnel_dev);
843         switch (cmd) {
844         case SIOCGETTUNNEL:
845                 if (dev == itn->fb_tunnel_dev) {
846                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
847                         if (!t)
848                                 t = netdev_priv(dev);
849                 }
850                 memcpy(p, &t->parms, sizeof(*p));
851                 break;
852 
853         case SIOCADDTUNNEL:
854         case SIOCCHGTUNNEL:
855                 err = -EPERM;
856                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
857                         goto done;
858                 if (p->iph.ttl)
859                         p->iph.frag_off |= htons(IP_DF);
860                 if (!(p->i_flags & VTI_ISVTI)) {
861                         if (!(p->i_flags & TUNNEL_KEY))
862                                 p->i_key = 0;
863                         if (!(p->o_flags & TUNNEL_KEY))
864                                 p->o_key = 0;
865                 }
866 
867                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
868 
869                 if (cmd == SIOCADDTUNNEL) {
870                         if (!t) {
871                                 t = ip_tunnel_create(net, itn, p);
872                                 err = PTR_ERR_OR_ZERO(t);
873                                 break;
874                         }
875 
876                         err = -EEXIST;
877                         break;
878                 }
879                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
880                         if (t) {
881                                 if (t->dev != dev) {
882                                         err = -EEXIST;
883                                         break;
884                                 }
885                         } else {
886                                 unsigned int nflags = 0;
887 
888                                 if (ipv4_is_multicast(p->iph.daddr))
889                                         nflags = IFF_BROADCAST;
890                                 else if (p->iph.daddr)
891                                         nflags = IFF_POINTOPOINT;
892 
893                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
894                                         err = -EINVAL;
895                                         break;
896                                 }
897 
898                                 t = netdev_priv(dev);
899                         }
900                 }
901 
902                 if (t) {
903                         err = 0;
904                         ip_tunnel_update(itn, t, dev, p, true, 0);
905                 } else {
906                         err = -ENOENT;
907                 }
908                 break;
909 
910         case SIOCDELTUNNEL:
911                 err = -EPERM;
912                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
913                         goto done;
914 
915                 if (dev == itn->fb_tunnel_dev) {
916                         err = -ENOENT;
917                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
918                         if (!t)
919                                 goto done;
920                         err = -EPERM;
921                         if (t == netdev_priv(itn->fb_tunnel_dev))
922                                 goto done;
923                         dev = t->dev;
924                 }
925                 unregister_netdevice(dev);
926                 err = 0;
927                 break;
928 
929         default:
930                 err = -EINVAL;
931         }
932 
933 done:
934         return err;
935 }
936 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
937 
938 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
939 {
940         struct ip_tunnel *tunnel = netdev_priv(dev);
941         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
942         int max_mtu = 0xFFF8 - dev->hard_header_len - t_hlen;
943 
944         if (new_mtu < ETH_MIN_MTU)
945                 return -EINVAL;
946 
947         if (new_mtu > max_mtu) {
948                 if (strict)
949                         return -EINVAL;
950 
951                 new_mtu = max_mtu;
952         }
953 
954         dev->mtu = new_mtu;
955         return 0;
956 }
957 EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
958 
959 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
960 {
961         return __ip_tunnel_change_mtu(dev, new_mtu, true);
962 }
963 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
964 
965 static void ip_tunnel_dev_free(struct net_device *dev)
966 {
967         struct ip_tunnel *tunnel = netdev_priv(dev);
968 
969         gro_cells_destroy(&tunnel->gro_cells);
970         dst_cache_destroy(&tunnel->dst_cache);
971         free_percpu(dev->tstats);
972 }
973 
974 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
975 {
976         struct ip_tunnel *tunnel = netdev_priv(dev);
977         struct ip_tunnel_net *itn;
978 
979         itn = net_generic(tunnel->net, tunnel->ip_tnl_net_id);
980 
981         if (itn->fb_tunnel_dev != dev) {
982                 ip_tunnel_del(itn, netdev_priv(dev));
983                 unregister_netdevice_queue(dev, head);
984         }
985 }
986 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
987 
988 struct net *ip_tunnel_get_link_net(const struct net_device *dev)
989 {
990         struct ip_tunnel *tunnel = netdev_priv(dev);
991 
992         return tunnel->net;
993 }
994 EXPORT_SYMBOL(ip_tunnel_get_link_net);
995 
996 int ip_tunnel_get_iflink(const struct net_device *dev)
997 {
998         struct ip_tunnel *tunnel = netdev_priv(dev);
999 
1000         return tunnel->parms.link;
1001 }
1002 EXPORT_SYMBOL(ip_tunnel_get_iflink);
1003 
1004 int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
1005                                   struct rtnl_link_ops *ops, char *devname)
1006 {
1007         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
1008         struct ip_tunnel_parm parms;
1009         unsigned int i;
1010 
1011         for (i = 0; i < IP_TNL_HASH_SIZE; i++)
1012                 INIT_HLIST_HEAD(&itn->tunnels[i]);
1013 
1014         if (!ops) {
1015                 itn->fb_tunnel_dev = NULL;
1016                 return 0;
1017         }
1018 
1019         memset(&parms, 0, sizeof(parms));
1020         if (devname)
1021                 strlcpy(parms.name, devname, IFNAMSIZ);
1022 
1023         rtnl_lock();
1024         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
1025         /* FB netdevice is special: we have one, and only one per netns.
1026          * Allowing to move it to another netns is clearly unsafe.
1027          */
1028         if (!IS_ERR(itn->fb_tunnel_dev)) {
1029                 itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
1030                 itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
1031                 ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
1032         }
1033         rtnl_unlock();
1034 
1035         return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);
1036 }
1037 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
1038 
1039 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,
1040                               struct rtnl_link_ops *ops)
1041 {
1042         struct net *net = dev_net(itn->fb_tunnel_dev);
1043         struct net_device *dev, *aux;
1044         int h;
1045 
1046         for_each_netdev_safe(net, dev, aux)
1047                 if (dev->rtnl_link_ops == ops)
1048                         unregister_netdevice_queue(dev, head);
1049 
1050         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
1051                 struct ip_tunnel *t;
1052                 struct hlist_node *n;
1053                 struct hlist_head *thead = &itn->tunnels[h];
1054 
1055                 hlist_for_each_entry_safe(t, n, thead, hash_node)
1056                         /* If dev is in the same netns, it has already
1057                          * been added to the list by the previous loop.
1058                          */
1059                         if (!net_eq(dev_net(t->dev), net))
1060                                 unregister_netdevice_queue(t->dev, head);
1061         }
1062 }
1063 
1064 void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops)
1065 {
1066         LIST_HEAD(list);
1067 
1068         rtnl_lock();
1069         ip_tunnel_destroy(itn, &list, ops);
1070         unregister_netdevice_many(&list);
1071         rtnl_unlock();
1072 }
1073 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
1074 
1075 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
1076                       struct ip_tunnel_parm *p, __u32 fwmark)
1077 {
1078         struct ip_tunnel *nt;
1079         struct net *net = dev_net(dev);
1080         struct ip_tunnel_net *itn;
1081         int mtu;
1082         int err;
1083 
1084         nt = netdev_priv(dev);
1085         itn = net_generic(net, nt->ip_tnl_net_id);
1086 
1087         if (nt->collect_md) {
1088                 if (rtnl_dereference(itn->collect_md_tun))
1089                         return -EEXIST;
1090         } else {
1091                 if (ip_tunnel_find(itn, p, dev->type))
1092                         return -EEXIST;
1093         }
1094 
1095         nt->net = net;
1096         nt->parms = *p;
1097         nt->fwmark = fwmark;
1098         err = register_netdevice(dev);
1099         if (err)
1100                 goto out;
1101 
1102         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1103                 eth_hw_addr_random(dev);
1104 
1105         mtu = ip_tunnel_bind_dev(dev);
1106         if (!tb[IFLA_MTU])
1107                 dev->mtu = mtu;
1108 
1109         ip_tunnel_add(itn, nt);
1110 out:
1111         return err;
1112 }
1113 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
1114 
1115 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
1116                          struct ip_tunnel_parm *p, __u32 fwmark)
1117 {
1118         struct ip_tunnel *t;
1119         struct ip_tunnel *tunnel = netdev_priv(dev);
1120         struct net *net = tunnel->net;
1121         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
1122 
1123         if (dev == itn->fb_tunnel_dev)
1124                 return -EINVAL;
1125 
1126         t = ip_tunnel_find(itn, p, dev->type);
1127 
1128         if (t) {
1129                 if (t->dev != dev)
1130                         return -EEXIST;
1131         } else {
1132                 t = tunnel;
1133 
1134                 if (dev->type != ARPHRD_ETHER) {
1135                         unsigned int nflags = 0;
1136 
1137                         if (ipv4_is_multicast(p->iph.daddr))
1138                                 nflags = IFF_BROADCAST;
1139                         else if (p->iph.daddr)
1140                                 nflags = IFF_POINTOPOINT;
1141 
1142                         if ((dev->flags ^ nflags) &
1143                             (IFF_POINTOPOINT | IFF_BROADCAST))
1144                                 return -EINVAL;
1145                 }
1146         }
1147 
1148         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU], fwmark);
1149         return 0;
1150 }
1151 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
1152 
1153 int ip_tunnel_init(struct net_device *dev)
1154 {
1155         struct ip_tunnel *tunnel = netdev_priv(dev);
1156         struct iphdr *iph = &tunnel->parms.iph;
1157         int err;
1158 
1159         dev->needs_free_netdev = true;
1160         dev->priv_destructor = ip_tunnel_dev_free;
1161         dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
1162         if (!dev->tstats)
1163                 return -ENOMEM;
1164 
1165         err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
1166         if (err) {
1167                 free_percpu(dev->tstats);
1168                 return err;
1169         }
1170 
1171         err = gro_cells_init(&tunnel->gro_cells, dev);
1172         if (err) {
1173                 dst_cache_destroy(&tunnel->dst_cache);
1174                 free_percpu(dev->tstats);
1175                 return err;
1176         }
1177 
1178         tunnel->dev = dev;
1179         tunnel->net = dev_net(dev);
1180         strcpy(tunnel->parms.name, dev->name);
1181         iph->version            = 4;
1182         iph->ihl                = 5;
1183 
1184         if (tunnel->collect_md) {
1185                 dev->features |= NETIF_F_NETNS_LOCAL;
1186                 netif_keep_dst(dev);
1187         }
1188         return 0;
1189 }
1190 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1191 
1192 void ip_tunnel_uninit(struct net_device *dev)
1193 {
1194         struct ip_tunnel *tunnel = netdev_priv(dev);
1195         struct net *net = tunnel->net;
1196         struct ip_tunnel_net *itn;
1197 
1198         itn = net_generic(net, tunnel->ip_tnl_net_id);
1199         /* fb_tunnel_dev will be unregisted in net-exit call. */
1200         if (itn->fb_tunnel_dev != dev)
1201                 ip_tunnel_del(itn, netdev_priv(dev));
1202 
1203         dst_cache_reset(&tunnel->dst_cache);
1204 }
1205 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1206 
1207 /* Do least required initialization, rest of init is done in tunnel_init call */
1208 void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
1209 {
1210         struct ip_tunnel *tunnel = netdev_priv(dev);
1211         tunnel->ip_tnl_net_id = net_id;
1212 }
1213 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1214 
1215 MODULE_LICENSE("GPL");
1216 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp