~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ip_gre.c

Version: ~ [ linux-5.13-rc7 ] ~ [ linux-5.12.12 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.45 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.127 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.195 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.237 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.273 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.273 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      Linux NET3:     GRE over IP protocol decoder.
  3  *
  4  *      Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
  5  *
  6  *      This program is free software; you can redistribute it and/or
  7  *      modify it under the terms of the GNU General Public License
  8  *      as published by the Free Software Foundation; either version
  9  *      2 of the License, or (at your option) any later version.
 10  *
 11  */
 12 
 13 #include <linux/capability.h>
 14 #include <linux/module.h>
 15 #include <linux/types.h>
 16 #include <linux/kernel.h>
 17 #include <asm/uaccess.h>
 18 #include <linux/skbuff.h>
 19 #include <linux/netdevice.h>
 20 #include <linux/in.h>
 21 #include <linux/tcp.h>
 22 #include <linux/udp.h>
 23 #include <linux/if_arp.h>
 24 #include <linux/mroute.h>
 25 #include <linux/init.h>
 26 #include <linux/in6.h>
 27 #include <linux/inetdevice.h>
 28 #include <linux/igmp.h>
 29 #include <linux/netfilter_ipv4.h>
 30 #include <linux/etherdevice.h>
 31 #include <linux/if_ether.h>
 32 
 33 #include <net/sock.h>
 34 #include <net/ip.h>
 35 #include <net/icmp.h>
 36 #include <net/protocol.h>
 37 #include <net/ipip.h>
 38 #include <net/arp.h>
 39 #include <net/checksum.h>
 40 #include <net/dsfield.h>
 41 #include <net/inet_ecn.h>
 42 #include <net/xfrm.h>
 43 #include <net/net_namespace.h>
 44 #include <net/netns/generic.h>
 45 #include <net/rtnetlink.h>
 46 
 47 #ifdef CONFIG_IPV6
 48 #include <net/ipv6.h>
 49 #include <net/ip6_fib.h>
 50 #include <net/ip6_route.h>
 51 #endif
 52 
 53 /*
 54    Problems & solutions
 55    --------------------
 56 
 57    1. The most important issue is detecting local dead loops.
 58    They would cause complete host lockup in transmit, which
 59    would be "resolved" by stack overflow or, if queueing is enabled,
 60    with infinite looping in net_bh.
 61 
 62    We cannot track such dead loops during route installation,
 63    it is infeasible task. The most general solutions would be
 64    to keep skb->encapsulation counter (sort of local ttl),
 65    and silently drop packet when it expires. It is the best
 66    solution, but it supposes maintaing new variable in ALL
 67    skb, even if no tunneling is used.
 68 
 69    Current solution: HARD_TX_LOCK lock breaks dead loops.
 70 
 71 
 72 
 73    2. Networking dead loops would not kill routers, but would really
 74    kill network. IP hop limit plays role of "t->recursion" in this case,
 75    if we copy it from packet being encapsulated to upper header.
 76    It is very good solution, but it introduces two problems:
 77 
 78    - Routing protocols, using packets with ttl=1 (OSPF, RIP2),
 79      do not work over tunnels.
 80    - traceroute does not work. I planned to relay ICMP from tunnel,
 81      so that this problem would be solved and traceroute output
 82      would even more informative. This idea appeared to be wrong:
 83      only Linux complies to rfc1812 now (yes, guys, Linux is the only
 84      true router now :-)), all routers (at least, in neighbourhood of mine)
 85      return only 8 bytes of payload. It is the end.
 86 
 87    Hence, if we want that OSPF worked or traceroute said something reasonable,
 88    we should search for another solution.
 89 
 90    One of them is to parse packet trying to detect inner encapsulation
 91    made by our node. It is difficult or even impossible, especially,
 92    taking into account fragmentation. TO be short, tt is not solution at all.
 93 
 94    Current solution: The solution was UNEXPECTEDLY SIMPLE.
 95    We force DF flag on tunnels with preconfigured hop limit,
 96    that is ALL. :-) Well, it does not remove the problem completely,
 97    but exponential growth of network traffic is changed to linear
 98    (branches, that exceed pmtu are pruned) and tunnel mtu
 99    fastly degrades to value <68, where looping stops.
100    Yes, it is not good if there exists a router in the loop,
101    which does not force DF, even when encapsulating packets have DF set.
102    But it is not our problem! Nobody could accuse us, we made
103    all that we could make. Even if it is your gated who injected
104    fatal route to network, even if it were you who configured
105    fatal static route: you are innocent. :-)
106 
107 
108 
109    3. Really, ipv4/ipip.c, ipv4/ip_gre.c and ipv6/sit.c contain
110    practically identical code. It would be good to glue them
111    together, but it is not very evident, how to make them modular.
112    sit is integral part of IPv6, ipip and gre are naturally modular.
113    We could extract common parts (hash table, ioctl etc)
114    to a separate module (ip_tunnel.c).
115 
116    Alexey Kuznetsov.
117  */
118 
119 static struct rtnl_link_ops ipgre_link_ops __read_mostly;
120 static int ipgre_tunnel_init(struct net_device *dev);
121 static void ipgre_tunnel_setup(struct net_device *dev);
122 static int ipgre_tunnel_bind_dev(struct net_device *dev);
123 
124 /* Fallback tunnel: no source, no destination, no key, no options */
125 
126 #define HASH_SIZE  16
127 
128 static int ipgre_net_id;
129 struct ipgre_net {
130         struct ip_tunnel *tunnels[4][HASH_SIZE];
131 
132         struct net_device *fb_tunnel_dev;
133 };
134 
135 /* Tunnel hash table */
136 
137 /*
138    4 hash tables:
139 
140    3: (remote,local)
141    2: (remote,*)
142    1: (*,local)
143    0: (*,*)
144 
145    We require exact key match i.e. if a key is present in packet
146    it will match only tunnel with the same key; if it is not present,
147    it will match only keyless tunnel.
148 
149    All keysless packets, if not matched configured keyless tunnels
150    will match fallback tunnel.
151  */
152 
153 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
154 
155 #define tunnels_r_l     tunnels[3]
156 #define tunnels_r       tunnels[2]
157 #define tunnels_l       tunnels[1]
158 #define tunnels_wc      tunnels[0]
159 
160 static DEFINE_RWLOCK(ipgre_lock);
161 
162 /* Given src, dst and key, find appropriate for input tunnel. */
163 
164 static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev,
165                                               __be32 remote, __be32 local,
166                                               __be32 key, __be16 gre_proto)
167 {
168         struct net *net = dev_net(dev);
169         int link = dev->ifindex;
170         unsigned h0 = HASH(remote);
171         unsigned h1 = HASH(key);
172         struct ip_tunnel *t, *cand = NULL;
173         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
174         int dev_type = (gre_proto == htons(ETH_P_TEB)) ?
175                        ARPHRD_ETHER : ARPHRD_IPGRE;
176         int score, cand_score = 4;
177 
178         for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) {
179                 if (local != t->parms.iph.saddr ||
180                     remote != t->parms.iph.daddr ||
181                     key != t->parms.i_key ||
182                     !(t->dev->flags & IFF_UP))
183                         continue;
184 
185                 if (t->dev->type != ARPHRD_IPGRE &&
186                     t->dev->type != dev_type)
187                         continue;
188 
189                 score = 0;
190                 if (t->parms.link != link)
191                         score |= 1;
192                 if (t->dev->type != dev_type)
193                         score |= 2;
194                 if (score == 0)
195                         return t;
196 
197                 if (score < cand_score) {
198                         cand = t;
199                         cand_score = score;
200                 }
201         }
202 
203         for (t = ign->tunnels_r[h0^h1]; t; t = t->next) {
204                 if (remote != t->parms.iph.daddr ||
205                     key != t->parms.i_key ||
206                     !(t->dev->flags & IFF_UP))
207                         continue;
208 
209                 if (t->dev->type != ARPHRD_IPGRE &&
210                     t->dev->type != dev_type)
211                         continue;
212 
213                 score = 0;
214                 if (t->parms.link != link)
215                         score |= 1;
216                 if (t->dev->type != dev_type)
217                         score |= 2;
218                 if (score == 0)
219                         return t;
220 
221                 if (score < cand_score) {
222                         cand = t;
223                         cand_score = score;
224                 }
225         }
226 
227         for (t = ign->tunnels_l[h1]; t; t = t->next) {
228                 if ((local != t->parms.iph.saddr &&
229                      (local != t->parms.iph.daddr ||
230                       !ipv4_is_multicast(local))) ||
231                     key != t->parms.i_key ||
232                     !(t->dev->flags & IFF_UP))
233                         continue;
234 
235                 if (t->dev->type != ARPHRD_IPGRE &&
236                     t->dev->type != dev_type)
237                         continue;
238 
239                 score = 0;
240                 if (t->parms.link != link)
241                         score |= 1;
242                 if (t->dev->type != dev_type)
243                         score |= 2;
244                 if (score == 0)
245                         return t;
246 
247                 if (score < cand_score) {
248                         cand = t;
249                         cand_score = score;
250                 }
251         }
252 
253         for (t = ign->tunnels_wc[h1]; t; t = t->next) {
254                 if (t->parms.i_key != key ||
255                     !(t->dev->flags & IFF_UP))
256                         continue;
257 
258                 if (t->dev->type != ARPHRD_IPGRE &&
259                     t->dev->type != dev_type)
260                         continue;
261 
262                 score = 0;
263                 if (t->parms.link != link)
264                         score |= 1;
265                 if (t->dev->type != dev_type)
266                         score |= 2;
267                 if (score == 0)
268                         return t;
269 
270                 if (score < cand_score) {
271                         cand = t;
272                         cand_score = score;
273                 }
274         }
275 
276         if (cand != NULL)
277                 return cand;
278 
279         if (ign->fb_tunnel_dev->flags & IFF_UP)
280                 return netdev_priv(ign->fb_tunnel_dev);
281 
282         return NULL;
283 }
284 
285 static struct ip_tunnel **__ipgre_bucket(struct ipgre_net *ign,
286                 struct ip_tunnel_parm *parms)
287 {
288         __be32 remote = parms->iph.daddr;
289         __be32 local = parms->iph.saddr;
290         __be32 key = parms->i_key;
291         unsigned h = HASH(key);
292         int prio = 0;
293 
294         if (local)
295                 prio |= 1;
296         if (remote && !ipv4_is_multicast(remote)) {
297                 prio |= 2;
298                 h ^= HASH(remote);
299         }
300 
301         return &ign->tunnels[prio][h];
302 }
303 
304 static inline struct ip_tunnel **ipgre_bucket(struct ipgre_net *ign,
305                 struct ip_tunnel *t)
306 {
307         return __ipgre_bucket(ign, &t->parms);
308 }
309 
310 static void ipgre_tunnel_link(struct ipgre_net *ign, struct ip_tunnel *t)
311 {
312         struct ip_tunnel **tp = ipgre_bucket(ign, t);
313 
314         t->next = *tp;
315         write_lock_bh(&ipgre_lock);
316         *tp = t;
317         write_unlock_bh(&ipgre_lock);
318 }
319 
320 static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t)
321 {
322         struct ip_tunnel **tp;
323 
324         for (tp = ipgre_bucket(ign, t); *tp; tp = &(*tp)->next) {
325                 if (t == *tp) {
326                         write_lock_bh(&ipgre_lock);
327                         *tp = t->next;
328                         write_unlock_bh(&ipgre_lock);
329                         break;
330                 }
331         }
332 }
333 
334 static struct ip_tunnel *ipgre_tunnel_find(struct net *net,
335                                            struct ip_tunnel_parm *parms,
336                                            int type)
337 {
338         __be32 remote = parms->iph.daddr;
339         __be32 local = parms->iph.saddr;
340         __be32 key = parms->i_key;
341         int link = parms->link;
342         struct ip_tunnel *t, **tp;
343         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
344 
345         for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next)
346                 if (local == t->parms.iph.saddr &&
347                     remote == t->parms.iph.daddr &&
348                     key == t->parms.i_key &&
349                     link == t->parms.link &&
350                     type == t->dev->type)
351                         break;
352 
353         return t;
354 }
355 
356 static struct ip_tunnel * ipgre_tunnel_locate(struct net *net,
357                 struct ip_tunnel_parm *parms, int create)
358 {
359         struct ip_tunnel *t, *nt;
360         struct net_device *dev;
361         char name[IFNAMSIZ];
362         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
363 
364         t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE);
365         if (t || !create)
366                 return t;
367 
368         if (parms->name[0])
369                 strlcpy(name, parms->name, IFNAMSIZ);
370         else
371                 sprintf(name, "gre%%d");
372 
373         dev = alloc_netdev(sizeof(*t), name, ipgre_tunnel_setup);
374         if (!dev)
375           return NULL;
376 
377         dev_net_set(dev, net);
378 
379         if (strchr(name, '%')) {
380                 if (dev_alloc_name(dev, name) < 0)
381                         goto failed_free;
382         }
383 
384         nt = netdev_priv(dev);
385         nt->parms = *parms;
386         dev->rtnl_link_ops = &ipgre_link_ops;
387 
388         dev->mtu = ipgre_tunnel_bind_dev(dev);
389 
390         if (register_netdevice(dev) < 0)
391                 goto failed_free;
392 
393         dev_hold(dev);
394         ipgre_tunnel_link(ign, nt);
395         return nt;
396 
397 failed_free:
398         free_netdev(dev);
399         return NULL;
400 }
401 
402 static void ipgre_tunnel_uninit(struct net_device *dev)
403 {
404         struct net *net = dev_net(dev);
405         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
406 
407         ipgre_tunnel_unlink(ign, netdev_priv(dev));
408         dev_put(dev);
409 }
410 
411 
412 static void ipgre_err(struct sk_buff *skb, u32 info)
413 {
414 
415 /* All the routers (except for Linux) return only
416    8 bytes of packet payload. It means, that precise relaying of
417    ICMP in the real Internet is absolutely infeasible.
418 
419    Moreover, Cisco "wise men" put GRE key to the third word
420    in GRE header. It makes impossible maintaining even soft state for keyed
421    GRE tunnels with enabled checksum. Tell them "thank you".
422 
423    Well, I wonder, rfc1812 was written by Cisco employee,
424    what the hell these idiots break standrads established
425    by themself???
426  */
427 
428         struct iphdr *iph = (struct iphdr *)skb->data;
429         __be16       *p = (__be16*)(skb->data+(iph->ihl<<2));
430         int grehlen = (iph->ihl<<2) + 4;
431         const int type = icmp_hdr(skb)->type;
432         const int code = icmp_hdr(skb)->code;
433         struct ip_tunnel *t;
434         __be16 flags;
435 
436         flags = p[0];
437         if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
438                 if (flags&(GRE_VERSION|GRE_ROUTING))
439                         return;
440                 if (flags&GRE_KEY) {
441                         grehlen += 4;
442                         if (flags&GRE_CSUM)
443                                 grehlen += 4;
444                 }
445         }
446 
447         /* If only 8 bytes returned, keyed message will be dropped here */
448         if (skb_headlen(skb) < grehlen)
449                 return;
450 
451         switch (type) {
452         default:
453         case ICMP_PARAMETERPROB:
454                 return;
455 
456         case ICMP_DEST_UNREACH:
457                 switch (code) {
458                 case ICMP_SR_FAILED:
459                 case ICMP_PORT_UNREACH:
460                         /* Impossible event. */
461                         return;
462                 case ICMP_FRAG_NEEDED:
463                         /* Soft state for pmtu is maintained by IP core. */
464                         return;
465                 default:
466                         /* All others are translated to HOST_UNREACH.
467                            rfc2003 contains "deep thoughts" about NET_UNREACH,
468                            I believe they are just ether pollution. --ANK
469                          */
470                         break;
471                 }
472                 break;
473         case ICMP_TIME_EXCEEDED:
474                 if (code != ICMP_EXC_TTL)
475                         return;
476                 break;
477         }
478 
479         read_lock(&ipgre_lock);
480         t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr,
481                                 flags & GRE_KEY ?
482                                 *(((__be32 *)p) + (grehlen / 4) - 1) : 0,
483                                 p[1]);
484         if (t == NULL || t->parms.iph.daddr == 0 ||
485             ipv4_is_multicast(t->parms.iph.daddr))
486                 goto out;
487 
488         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
489                 goto out;
490 
491         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
492                 t->err_count++;
493         else
494                 t->err_count = 1;
495         t->err_time = jiffies;
496 out:
497         read_unlock(&ipgre_lock);
498         return;
499 }
500 
501 static inline void ipgre_ecn_decapsulate(struct iphdr *iph, struct sk_buff *skb)
502 {
503         if (INET_ECN_is_ce(iph->tos)) {
504                 if (skb->protocol == htons(ETH_P_IP)) {
505                         IP_ECN_set_ce(ip_hdr(skb));
506                 } else if (skb->protocol == htons(ETH_P_IPV6)) {
507                         IP6_ECN_set_ce(ipv6_hdr(skb));
508                 }
509         }
510 }
511 
512 static inline u8
513 ipgre_ecn_encapsulate(u8 tos, struct iphdr *old_iph, struct sk_buff *skb)
514 {
515         u8 inner = 0;
516         if (skb->protocol == htons(ETH_P_IP))
517                 inner = old_iph->tos;
518         else if (skb->protocol == htons(ETH_P_IPV6))
519                 inner = ipv6_get_dsfield((struct ipv6hdr *)old_iph);
520         return INET_ECN_encapsulate(tos, inner);
521 }
522 
523 static int ipgre_rcv(struct sk_buff *skb)
524 {
525         struct iphdr *iph;
526         u8     *h;
527         __be16    flags;
528         __sum16   csum = 0;
529         __be32 key = 0;
530         u32    seqno = 0;
531         struct ip_tunnel *tunnel;
532         int    offset = 4;
533         __be16 gre_proto;
534         unsigned int len;
535 
536         if (!pskb_may_pull(skb, 16))
537                 goto drop_nolock;
538 
539         iph = ip_hdr(skb);
540         h = skb->data;
541         flags = *(__be16*)h;
542 
543         if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) {
544                 /* - Version must be 0.
545                    - We do not support routing headers.
546                  */
547                 if (flags&(GRE_VERSION|GRE_ROUTING))
548                         goto drop_nolock;
549 
550                 if (flags&GRE_CSUM) {
551                         switch (skb->ip_summed) {
552                         case CHECKSUM_COMPLETE:
553                                 csum = csum_fold(skb->csum);
554                                 if (!csum)
555                                         break;
556                                 /* fall through */
557                         case CHECKSUM_NONE:
558                                 skb->csum = 0;
559                                 csum = __skb_checksum_complete(skb);
560                                 skb->ip_summed = CHECKSUM_COMPLETE;
561                         }
562                         offset += 4;
563                 }
564                 if (flags&GRE_KEY) {
565                         key = *(__be32*)(h + offset);
566                         offset += 4;
567                 }
568                 if (flags&GRE_SEQ) {
569                         seqno = ntohl(*(__be32*)(h + offset));
570                         offset += 4;
571                 }
572         }
573 
574         gre_proto = *(__be16 *)(h + 2);
575 
576         read_lock(&ipgre_lock);
577         if ((tunnel = ipgre_tunnel_lookup(skb->dev,
578                                           iph->saddr, iph->daddr, key,
579                                           gre_proto))) {
580                 struct net_device_stats *stats = &tunnel->dev->stats;
581 
582                 secpath_reset(skb);
583 
584                 skb->protocol = gre_proto;
585                 /* WCCP version 1 and 2 protocol decoding.
586                  * - Change protocol to IP
587                  * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header
588                  */
589                 if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) {
590                         skb->protocol = htons(ETH_P_IP);
591                         if ((*(h + offset) & 0xF0) != 0x40)
592                                 offset += 4;
593                 }
594 
595                 skb->mac_header = skb->network_header;
596                 __pskb_pull(skb, offset);
597                 skb_postpull_rcsum(skb, skb_transport_header(skb), offset);
598                 skb->pkt_type = PACKET_HOST;
599 #ifdef CONFIG_NET_IPGRE_BROADCAST
600                 if (ipv4_is_multicast(iph->daddr)) {
601                         /* Looped back packet, drop it! */
602                         if (skb_rtable(skb)->fl.iif == 0)
603                                 goto drop;
604                         stats->multicast++;
605                         skb->pkt_type = PACKET_BROADCAST;
606                 }
607 #endif
608 
609                 if (((flags&GRE_CSUM) && csum) ||
610                     (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) {
611                         stats->rx_crc_errors++;
612                         stats->rx_errors++;
613                         goto drop;
614                 }
615                 if (tunnel->parms.i_flags&GRE_SEQ) {
616                         if (!(flags&GRE_SEQ) ||
617                             (tunnel->i_seqno && (s32)(seqno - tunnel->i_seqno) < 0)) {
618                                 stats->rx_fifo_errors++;
619                                 stats->rx_errors++;
620                                 goto drop;
621                         }
622                         tunnel->i_seqno = seqno + 1;
623                 }
624 
625                 len = skb->len;
626 
627                 /* Warning: All skb pointers will be invalidated! */
628                 if (tunnel->dev->type == ARPHRD_ETHER) {
629                         if (!pskb_may_pull(skb, ETH_HLEN)) {
630                                 stats->rx_length_errors++;
631                                 stats->rx_errors++;
632                                 goto drop;
633                         }
634 
635                         iph = ip_hdr(skb);
636                         skb->protocol = eth_type_trans(skb, tunnel->dev);
637                         skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
638                 }
639 
640                 stats->rx_packets++;
641                 stats->rx_bytes += len;
642                 skb->dev = tunnel->dev;
643                 skb_dst_drop(skb);
644                 nf_reset(skb);
645 
646                 skb_reset_network_header(skb);
647                 ipgre_ecn_decapsulate(iph, skb);
648 
649                 netif_rx(skb);
650                 read_unlock(&ipgre_lock);
651                 return(0);
652         }
653         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
654 
655 drop:
656         read_unlock(&ipgre_lock);
657 drop_nolock:
658         kfree_skb(skb);
659         return(0);
660 }
661 
662 static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
663 {
664         struct ip_tunnel *tunnel = netdev_priv(dev);
665         struct net_device_stats *stats = &tunnel->dev->stats;
666         struct iphdr  *old_iph = ip_hdr(skb);
667         struct iphdr  *tiph;
668         u8     tos;
669         __be16 df;
670         struct rtable *rt;                      /* Route to the other host */
671         struct net_device *tdev;                        /* Device to other host */
672         struct iphdr  *iph;                     /* Our new IP header */
673         unsigned int max_headroom;              /* The extra header space needed */
674         int    gre_hlen;
675         __be32 dst;
676         int    mtu;
677 
678         if (dev->type == ARPHRD_ETHER)
679                 IPCB(skb)->flags = 0;
680 
681         if (dev->header_ops && dev->type == ARPHRD_IPGRE) {
682                 gre_hlen = 0;
683                 tiph = (struct iphdr *)skb->data;
684         } else {
685                 gre_hlen = tunnel->hlen;
686                 tiph = &tunnel->parms.iph;
687         }
688 
689         if ((dst = tiph->daddr) == 0) {
690                 /* NBMA tunnel */
691 
692                 if (skb_dst(skb) == NULL) {
693                         stats->tx_fifo_errors++;
694                         goto tx_error;
695                 }
696 
697                 if (skb->protocol == htons(ETH_P_IP)) {
698                         rt = skb_rtable(skb);
699                         if ((dst = rt->rt_gateway) == 0)
700                                 goto tx_error_icmp;
701                 }
702 #ifdef CONFIG_IPV6
703                 else if (skb->protocol == htons(ETH_P_IPV6)) {
704                         struct in6_addr *addr6;
705                         int addr_type;
706                         struct neighbour *neigh = skb_dst(skb)->neighbour;
707 
708                         if (neigh == NULL)
709                                 goto tx_error;
710 
711                         addr6 = (struct in6_addr *)&neigh->primary_key;
712                         addr_type = ipv6_addr_type(addr6);
713 
714                         if (addr_type == IPV6_ADDR_ANY) {
715                                 addr6 = &ipv6_hdr(skb)->daddr;
716                                 addr_type = ipv6_addr_type(addr6);
717                         }
718 
719                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
720                                 goto tx_error_icmp;
721 
722                         dst = addr6->s6_addr32[3];
723                 }
724 #endif
725                 else
726                         goto tx_error;
727         }
728 
729         tos = tiph->tos;
730         if (tos == 1) {
731                 tos = 0;
732                 if (skb->protocol == htons(ETH_P_IP))
733                         tos = old_iph->tos;
734         }
735 
736         {
737                 struct flowi fl = { .oif = tunnel->parms.link,
738                                     .nl_u = { .ip4_u =
739                                               { .daddr = dst,
740                                                 .saddr = tiph->saddr,
741                                                 .tos = RT_TOS(tos) } },
742                                     .proto = IPPROTO_GRE };
743                 if (ip_route_output_key(dev_net(dev), &rt, &fl)) {
744                         stats->tx_carrier_errors++;
745                         goto tx_error;
746                 }
747         }
748         tdev = rt->u.dst.dev;
749 
750         if (tdev == dev) {
751                 ip_rt_put(rt);
752                 stats->collisions++;
753                 goto tx_error;
754         }
755 
756         df = tiph->frag_off;
757         if (df)
758                 mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen;
759         else
760                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
761 
762         if (skb_dst(skb))
763                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
764 
765         if (skb->protocol == htons(ETH_P_IP)) {
766                 df |= (old_iph->frag_off&htons(IP_DF));
767 
768                 if ((old_iph->frag_off&htons(IP_DF)) &&
769                     mtu < ntohs(old_iph->tot_len)) {
770                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
771                         ip_rt_put(rt);
772                         goto tx_error;
773                 }
774         }
775 #ifdef CONFIG_IPV6
776         else if (skb->protocol == htons(ETH_P_IPV6)) {
777                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
778 
779                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) && mtu >= IPV6_MIN_MTU) {
780                         if ((tunnel->parms.iph.daddr &&
781                              !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
782                             rt6->rt6i_dst.plen == 128) {
783                                 rt6->rt6i_flags |= RTF_MODIFIED;
784                                 skb_dst(skb)->metrics[RTAX_MTU-1] = mtu;
785                         }
786                 }
787 
788                 if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) {
789                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
790                         ip_rt_put(rt);
791                         goto tx_error;
792                 }
793         }
794 #endif
795 
796         if (tunnel->err_count > 0) {
797                 if (time_before(jiffies,
798                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
799                         tunnel->err_count--;
800 
801                         dst_link_failure(skb);
802                 } else
803                         tunnel->err_count = 0;
804         }
805 
806         max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
807 
808         if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
809             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
810                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
811                 if (!new_skb) {
812                         ip_rt_put(rt);
813                         stats->tx_dropped++;
814                         dev_kfree_skb(skb);
815                         return NETDEV_TX_OK;
816                 }
817                 if (skb->sk)
818                         skb_set_owner_w(new_skb, skb->sk);
819                 dev_kfree_skb(skb);
820                 skb = new_skb;
821                 old_iph = ip_hdr(skb);
822         }
823 
824         skb_reset_transport_header(skb);
825         skb_push(skb, gre_hlen);
826         skb_reset_network_header(skb);
827         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
828         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
829                               IPSKB_REROUTED);
830         skb_dst_drop(skb);
831         skb_dst_set(skb, &rt->u.dst);
832 
833         /*
834          *      Push down and install the IPIP header.
835          */
836 
837         iph                     =       ip_hdr(skb);
838         iph->version            =       4;
839         iph->ihl                =       sizeof(struct iphdr) >> 2;
840         iph->frag_off           =       df;
841         iph->protocol           =       IPPROTO_GRE;
842         iph->tos                =       ipgre_ecn_encapsulate(tos, old_iph, skb);
843         iph->daddr              =       rt->rt_dst;
844         iph->saddr              =       rt->rt_src;
845 
846         if ((iph->ttl = tiph->ttl) == 0) {
847                 if (skb->protocol == htons(ETH_P_IP))
848                         iph->ttl = old_iph->ttl;
849 #ifdef CONFIG_IPV6
850                 else if (skb->protocol == htons(ETH_P_IPV6))
851                         iph->ttl = ((struct ipv6hdr *)old_iph)->hop_limit;
852 #endif
853                 else
854                         iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
855         }
856 
857         ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags;
858         ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ?
859                                    htons(ETH_P_TEB) : skb->protocol;
860 
861         if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) {
862                 __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4);
863 
864                 if (tunnel->parms.o_flags&GRE_SEQ) {
865                         ++tunnel->o_seqno;
866                         *ptr = htonl(tunnel->o_seqno);
867                         ptr--;
868                 }
869                 if (tunnel->parms.o_flags&GRE_KEY) {
870                         *ptr = tunnel->parms.o_key;
871                         ptr--;
872                 }
873                 if (tunnel->parms.o_flags&GRE_CSUM) {
874                         *ptr = 0;
875                         *(__sum16*)ptr = ip_compute_csum((void*)(iph+1), skb->len - sizeof(struct iphdr));
876                 }
877         }
878 
879         nf_reset(skb);
880 
881         IPTUNNEL_XMIT();
882         return NETDEV_TX_OK;
883 
884 tx_error_icmp:
885         dst_link_failure(skb);
886 
887 tx_error:
888         stats->tx_errors++;
889         dev_kfree_skb(skb);
890         return NETDEV_TX_OK;
891 }
892 
893 static int ipgre_tunnel_bind_dev(struct net_device *dev)
894 {
895         struct net_device *tdev = NULL;
896         struct ip_tunnel *tunnel;
897         struct iphdr *iph;
898         int hlen = LL_MAX_HEADER;
899         int mtu = ETH_DATA_LEN;
900         int addend = sizeof(struct iphdr) + 4;
901 
902         tunnel = netdev_priv(dev);
903         iph = &tunnel->parms.iph;
904 
905         /* Guess output device to choose reasonable mtu and needed_headroom */
906 
907         if (iph->daddr) {
908                 struct flowi fl = { .oif = tunnel->parms.link,
909                                     .nl_u = { .ip4_u =
910                                               { .daddr = iph->daddr,
911                                                 .saddr = iph->saddr,
912                                                 .tos = RT_TOS(iph->tos) } },
913                                     .proto = IPPROTO_GRE };
914                 struct rtable *rt;
915                 if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
916                         tdev = rt->u.dst.dev;
917                         ip_rt_put(rt);
918                 }
919 
920                 if (dev->type != ARPHRD_ETHER)
921                         dev->flags |= IFF_POINTOPOINT;
922         }
923 
924         if (!tdev && tunnel->parms.link)
925                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
926 
927         if (tdev) {
928                 hlen = tdev->hard_header_len + tdev->needed_headroom;
929                 mtu = tdev->mtu;
930         }
931         dev->iflink = tunnel->parms.link;
932 
933         /* Precalculate GRE options length */
934         if (tunnel->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) {
935                 if (tunnel->parms.o_flags&GRE_CSUM)
936                         addend += 4;
937                 if (tunnel->parms.o_flags&GRE_KEY)
938                         addend += 4;
939                 if (tunnel->parms.o_flags&GRE_SEQ)
940                         addend += 4;
941         }
942         dev->needed_headroom = addend + hlen;
943         mtu -= dev->hard_header_len + addend;
944 
945         if (mtu < 68)
946                 mtu = 68;
947 
948         tunnel->hlen = addend;
949 
950         return mtu;
951 }
952 
953 static int
954 ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
955 {
956         int err = 0;
957         struct ip_tunnel_parm p;
958         struct ip_tunnel *t;
959         struct net *net = dev_net(dev);
960         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
961 
962         switch (cmd) {
963         case SIOCGETTUNNEL:
964                 t = NULL;
965                 if (dev == ign->fb_tunnel_dev) {
966                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
967                                 err = -EFAULT;
968                                 break;
969                         }
970                         t = ipgre_tunnel_locate(net, &p, 0);
971                 }
972                 if (t == NULL)
973                         t = netdev_priv(dev);
974                 memcpy(&p, &t->parms, sizeof(p));
975                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
976                         err = -EFAULT;
977                 break;
978 
979         case SIOCADDTUNNEL:
980         case SIOCCHGTUNNEL:
981                 err = -EPERM;
982                 if (!capable(CAP_NET_ADMIN))
983                         goto done;
984 
985                 err = -EFAULT;
986                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
987                         goto done;
988 
989                 err = -EINVAL;
990                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
991                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
992                     ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
993                         goto done;
994                 if (p.iph.ttl)
995                         p.iph.frag_off |= htons(IP_DF);
996 
997                 if (!(p.i_flags&GRE_KEY))
998                         p.i_key = 0;
999                 if (!(p.o_flags&GRE_KEY))
1000                         p.o_key = 0;
1001 
1002                 t = ipgre_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
1003 
1004                 if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
1005                         if (t != NULL) {
1006                                 if (t->dev != dev) {
1007                                         err = -EEXIST;
1008                                         break;
1009                                 }
1010                         } else {
1011                                 unsigned nflags = 0;
1012 
1013                                 t = netdev_priv(dev);
1014 
1015                                 if (ipv4_is_multicast(p.iph.daddr))
1016                                         nflags = IFF_BROADCAST;
1017                                 else if (p.iph.daddr)
1018                                         nflags = IFF_POINTOPOINT;
1019 
1020                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
1021                                         err = -EINVAL;
1022                                         break;
1023                                 }
1024                                 ipgre_tunnel_unlink(ign, t);
1025                                 t->parms.iph.saddr = p.iph.saddr;
1026                                 t->parms.iph.daddr = p.iph.daddr;
1027                                 t->parms.i_key = p.i_key;
1028                                 t->parms.o_key = p.o_key;
1029                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
1030                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
1031                                 ipgre_tunnel_link(ign, t);
1032                                 netdev_state_change(dev);
1033                         }
1034                 }
1035 
1036                 if (t) {
1037                         err = 0;
1038                         if (cmd == SIOCCHGTUNNEL) {
1039                                 t->parms.iph.ttl = p.iph.ttl;
1040                                 t->parms.iph.tos = p.iph.tos;
1041                                 t->parms.iph.frag_off = p.iph.frag_off;
1042                                 if (t->parms.link != p.link) {
1043                                         t->parms.link = p.link;
1044                                         dev->mtu = ipgre_tunnel_bind_dev(dev);
1045                                         netdev_state_change(dev);
1046                                 }
1047                         }
1048                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
1049                                 err = -EFAULT;
1050                 } else
1051                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
1052                 break;
1053 
1054         case SIOCDELTUNNEL:
1055                 err = -EPERM;
1056                 if (!capable(CAP_NET_ADMIN))
1057                         goto done;
1058 
1059                 if (dev == ign->fb_tunnel_dev) {
1060                         err = -EFAULT;
1061                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
1062                                 goto done;
1063                         err = -ENOENT;
1064                         if ((t = ipgre_tunnel_locate(net, &p, 0)) == NULL)
1065                                 goto done;
1066                         err = -EPERM;
1067                         if (t == netdev_priv(ign->fb_tunnel_dev))
1068                                 goto done;
1069                         dev = t->dev;
1070                 }
1071                 unregister_netdevice(dev);
1072                 err = 0;
1073                 break;
1074 
1075         default:
1076                 err = -EINVAL;
1077         }
1078 
1079 done:
1080         return err;
1081 }
1082 
1083 static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu)
1084 {
1085         struct ip_tunnel *tunnel = netdev_priv(dev);
1086         if (new_mtu < 68 ||
1087             new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen)
1088                 return -EINVAL;
1089         dev->mtu = new_mtu;
1090         return 0;
1091 }
1092 
1093 /* Nice toy. Unfortunately, useless in real life :-)
1094    It allows to construct virtual multiprotocol broadcast "LAN"
1095    over the Internet, provided multicast routing is tuned.
1096 
1097 
1098    I have no idea was this bicycle invented before me,
1099    so that I had to set ARPHRD_IPGRE to a random value.
1100    I have an impression, that Cisco could make something similar,
1101    but this feature is apparently missing in IOS<=11.2(8).
1102 
1103    I set up 10.66.66/24 and fec0:6666:6666::0/96 as virtual networks
1104    with broadcast 224.66.66.66. If you have access to mbone, play with me :-)
1105 
1106    ping -t 255 224.66.66.66
1107 
1108    If nobody answers, mbone does not work.
1109 
1110    ip tunnel add Universe mode gre remote 224.66.66.66 local <Your_real_addr> ttl 255
1111    ip addr add 10.66.66.<somewhat>/24 dev Universe
1112    ifconfig Universe up
1113    ifconfig Universe add fe80::<Your_real_addr>/10
1114    ifconfig Universe add fec0:6666:6666::<Your_real_addr>/96
1115    ftp 10.66.66.66
1116    ...
1117    ftp fec0:6666:6666::193.233.7.65
1118    ...
1119 
1120  */
1121 
1122 static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
1123                         unsigned short type,
1124                         const void *daddr, const void *saddr, unsigned len)
1125 {
1126         struct ip_tunnel *t = netdev_priv(dev);
1127         struct iphdr *iph = (struct iphdr *)skb_push(skb, t->hlen);
1128         __be16 *p = (__be16*)(iph+1);
1129 
1130         memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
1131         p[0]            = t->parms.o_flags;
1132         p[1]            = htons(type);
1133 
1134         /*
1135          *      Set the source hardware address.
1136          */
1137 
1138         if (saddr)
1139                 memcpy(&iph->saddr, saddr, 4);
1140 
1141         if (daddr) {
1142                 memcpy(&iph->daddr, daddr, 4);
1143                 return t->hlen;
1144         }
1145         if (iph->daddr && !ipv4_is_multicast(iph->daddr))
1146                 return t->hlen;
1147 
1148         return -t->hlen;
1149 }
1150 
1151 static int ipgre_header_parse(const struct sk_buff *skb, unsigned char *haddr)
1152 {
1153         struct iphdr *iph = (struct iphdr *) skb_mac_header(skb);
1154         memcpy(haddr, &iph->saddr, 4);
1155         return 4;
1156 }
1157 
1158 static const struct header_ops ipgre_header_ops = {
1159         .create = ipgre_header,
1160         .parse  = ipgre_header_parse,
1161 };
1162 
1163 #ifdef CONFIG_NET_IPGRE_BROADCAST
1164 static int ipgre_open(struct net_device *dev)
1165 {
1166         struct ip_tunnel *t = netdev_priv(dev);
1167 
1168         if (ipv4_is_multicast(t->parms.iph.daddr)) {
1169                 struct flowi fl = { .oif = t->parms.link,
1170                                     .nl_u = { .ip4_u =
1171                                               { .daddr = t->parms.iph.daddr,
1172                                                 .saddr = t->parms.iph.saddr,
1173                                                 .tos = RT_TOS(t->parms.iph.tos) } },
1174                                     .proto = IPPROTO_GRE };
1175                 struct rtable *rt;
1176                 if (ip_route_output_key(dev_net(dev), &rt, &fl))
1177                         return -EADDRNOTAVAIL;
1178                 dev = rt->u.dst.dev;
1179                 ip_rt_put(rt);
1180                 if (__in_dev_get_rtnl(dev) == NULL)
1181                         return -EADDRNOTAVAIL;
1182                 t->mlink = dev->ifindex;
1183                 ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
1184         }
1185         return 0;
1186 }
1187 
1188 static int ipgre_close(struct net_device *dev)
1189 {
1190         struct ip_tunnel *t = netdev_priv(dev);
1191 
1192         if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
1193                 struct in_device *in_dev;
1194                 in_dev = inetdev_by_index(dev_net(dev), t->mlink);
1195                 if (in_dev) {
1196                         ip_mc_dec_group(in_dev, t->parms.iph.daddr);
1197                         in_dev_put(in_dev);
1198                 }
1199         }
1200         return 0;
1201 }
1202 
1203 #endif
1204 
1205 static const struct net_device_ops ipgre_netdev_ops = {
1206         .ndo_init               = ipgre_tunnel_init,
1207         .ndo_uninit             = ipgre_tunnel_uninit,
1208 #ifdef CONFIG_NET_IPGRE_BROADCAST
1209         .ndo_open               = ipgre_open,
1210         .ndo_stop               = ipgre_close,
1211 #endif
1212         .ndo_start_xmit         = ipgre_tunnel_xmit,
1213         .ndo_do_ioctl           = ipgre_tunnel_ioctl,
1214         .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1215 };
1216 
1217 static void ipgre_tunnel_setup(struct net_device *dev)
1218 {
1219         dev->netdev_ops         = &ipgre_netdev_ops;
1220         dev->destructor         = free_netdev;
1221 
1222         dev->type               = ARPHRD_IPGRE;
1223         dev->needed_headroom    = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
1224         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
1225         dev->flags              = IFF_NOARP;
1226         dev->iflink             = 0;
1227         dev->addr_len           = 4;
1228         dev->features           |= NETIF_F_NETNS_LOCAL;
1229         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
1230 }
1231 
1232 static int ipgre_tunnel_init(struct net_device *dev)
1233 {
1234         struct ip_tunnel *tunnel;
1235         struct iphdr *iph;
1236 
1237         tunnel = netdev_priv(dev);
1238         iph = &tunnel->parms.iph;
1239 
1240         tunnel->dev = dev;
1241         strcpy(tunnel->parms.name, dev->name);
1242 
1243         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
1244         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
1245 
1246         if (iph->daddr) {
1247 #ifdef CONFIG_NET_IPGRE_BROADCAST
1248                 if (ipv4_is_multicast(iph->daddr)) {
1249                         if (!iph->saddr)
1250                                 return -EINVAL;
1251                         dev->flags = IFF_BROADCAST;
1252                         dev->header_ops = &ipgre_header_ops;
1253                 }
1254 #endif
1255         } else
1256                 dev->header_ops = &ipgre_header_ops;
1257 
1258         return 0;
1259 }
1260 
1261 static void ipgre_fb_tunnel_init(struct net_device *dev)
1262 {
1263         struct ip_tunnel *tunnel = netdev_priv(dev);
1264         struct iphdr *iph = &tunnel->parms.iph;
1265         struct ipgre_net *ign = net_generic(dev_net(dev), ipgre_net_id);
1266 
1267         tunnel->dev = dev;
1268         strcpy(tunnel->parms.name, dev->name);
1269 
1270         iph->version            = 4;
1271         iph->protocol           = IPPROTO_GRE;
1272         iph->ihl                = 5;
1273         tunnel->hlen            = sizeof(struct iphdr) + 4;
1274 
1275         dev_hold(dev);
1276         ign->tunnels_wc[0]      = tunnel;
1277 }
1278 
1279 
1280 static const struct net_protocol ipgre_protocol = {
1281         .handler        =       ipgre_rcv,
1282         .err_handler    =       ipgre_err,
1283         .netns_ok       =       1,
1284 };
1285 
1286 static void ipgre_destroy_tunnels(struct ipgre_net *ign)
1287 {
1288         int prio;
1289 
1290         for (prio = 0; prio < 4; prio++) {
1291                 int h;
1292                 for (h = 0; h < HASH_SIZE; h++) {
1293                         struct ip_tunnel *t;
1294                         while ((t = ign->tunnels[prio][h]) != NULL)
1295                                 unregister_netdevice(t->dev);
1296                 }
1297         }
1298 }
1299 
1300 static int ipgre_init_net(struct net *net)
1301 {
1302         int err;
1303         struct ipgre_net *ign;
1304 
1305         err = -ENOMEM;
1306         ign = kzalloc(sizeof(struct ipgre_net), GFP_KERNEL);
1307         if (ign == NULL)
1308                 goto err_alloc;
1309 
1310         err = net_assign_generic(net, ipgre_net_id, ign);
1311         if (err < 0)
1312                 goto err_assign;
1313 
1314         ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
1315                                            ipgre_tunnel_setup);
1316         if (!ign->fb_tunnel_dev) {
1317                 err = -ENOMEM;
1318                 goto err_alloc_dev;
1319         }
1320         dev_net_set(ign->fb_tunnel_dev, net);
1321 
1322         ipgre_fb_tunnel_init(ign->fb_tunnel_dev);
1323         ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops;
1324 
1325         if ((err = register_netdev(ign->fb_tunnel_dev)))
1326                 goto err_reg_dev;
1327 
1328         return 0;
1329 
1330 err_reg_dev:
1331         free_netdev(ign->fb_tunnel_dev);
1332 err_alloc_dev:
1333         /* nothing */
1334 err_assign:
1335         kfree(ign);
1336 err_alloc:
1337         return err;
1338 }
1339 
1340 static void ipgre_exit_net(struct net *net)
1341 {
1342         struct ipgre_net *ign;
1343 
1344         ign = net_generic(net, ipgre_net_id);
1345         rtnl_lock();
1346         ipgre_destroy_tunnels(ign);
1347         rtnl_unlock();
1348         kfree(ign);
1349 }
1350 
1351 static struct pernet_operations ipgre_net_ops = {
1352         .init = ipgre_init_net,
1353         .exit = ipgre_exit_net,
1354 };
1355 
1356 static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
1357 {
1358         __be16 flags;
1359 
1360         if (!data)
1361                 return 0;
1362 
1363         flags = 0;
1364         if (data[IFLA_GRE_IFLAGS])
1365                 flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
1366         if (data[IFLA_GRE_OFLAGS])
1367                 flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
1368         if (flags & (GRE_VERSION|GRE_ROUTING))
1369                 return -EINVAL;
1370 
1371         return 0;
1372 }
1373 
1374 static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
1375 {
1376         __be32 daddr;
1377 
1378         if (tb[IFLA_ADDRESS]) {
1379                 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1380                         return -EINVAL;
1381                 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1382                         return -EADDRNOTAVAIL;
1383         }
1384 
1385         if (!data)
1386                 goto out;
1387 
1388         if (data[IFLA_GRE_REMOTE]) {
1389                 memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4);
1390                 if (!daddr)
1391                         return -EINVAL;
1392         }
1393 
1394 out:
1395         return ipgre_tunnel_validate(tb, data);
1396 }
1397 
1398 static void ipgre_netlink_parms(struct nlattr *data[],
1399                                 struct ip_tunnel_parm *parms)
1400 {
1401         memset(parms, 0, sizeof(*parms));
1402 
1403         parms->iph.protocol = IPPROTO_GRE;
1404 
1405         if (!data)
1406                 return;
1407 
1408         if (data[IFLA_GRE_LINK])
1409                 parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
1410 
1411         if (data[IFLA_GRE_IFLAGS])
1412                 parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]);
1413 
1414         if (data[IFLA_GRE_OFLAGS])
1415                 parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]);
1416 
1417         if (data[IFLA_GRE_IKEY])
1418                 parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
1419 
1420         if (data[IFLA_GRE_OKEY])
1421                 parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
1422 
1423         if (data[IFLA_GRE_LOCAL])
1424                 parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
1425 
1426         if (data[IFLA_GRE_REMOTE])
1427                 parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
1428 
1429         if (data[IFLA_GRE_TTL])
1430                 parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
1431 
1432         if (data[IFLA_GRE_TOS])
1433                 parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
1434 
1435         if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
1436                 parms->iph.frag_off = htons(IP_DF);
1437 }
1438 
1439 static int ipgre_tap_init(struct net_device *dev)
1440 {
1441         struct ip_tunnel *tunnel;
1442 
1443         tunnel = netdev_priv(dev);
1444 
1445         tunnel->dev = dev;
1446         strcpy(tunnel->parms.name, dev->name);
1447 
1448         ipgre_tunnel_bind_dev(dev);
1449 
1450         return 0;
1451 }
1452 
1453 static const struct net_device_ops ipgre_tap_netdev_ops = {
1454         .ndo_init               = ipgre_tap_init,
1455         .ndo_uninit             = ipgre_tunnel_uninit,
1456         .ndo_start_xmit         = ipgre_tunnel_xmit,
1457         .ndo_set_mac_address    = eth_mac_addr,
1458         .ndo_validate_addr      = eth_validate_addr,
1459         .ndo_change_mtu         = ipgre_tunnel_change_mtu,
1460 };
1461 
1462 static void ipgre_tap_setup(struct net_device *dev)
1463 {
1464 
1465         ether_setup(dev);
1466 
1467         dev->netdev_ops         = &ipgre_tap_netdev_ops;
1468         dev->destructor         = free_netdev;
1469 
1470         dev->iflink             = 0;
1471         dev->features           |= NETIF_F_NETNS_LOCAL;
1472 }
1473 
1474 static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[],
1475                          struct nlattr *data[])
1476 {
1477         struct ip_tunnel *nt;
1478         struct net *net = dev_net(dev);
1479         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1480         int mtu;
1481         int err;
1482 
1483         nt = netdev_priv(dev);
1484         ipgre_netlink_parms(data, &nt->parms);
1485 
1486         if (ipgre_tunnel_find(net, &nt->parms, dev->type))
1487                 return -EEXIST;
1488 
1489         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
1490                 random_ether_addr(dev->dev_addr);
1491 
1492         mtu = ipgre_tunnel_bind_dev(dev);
1493         if (!tb[IFLA_MTU])
1494                 dev->mtu = mtu;
1495 
1496         err = register_netdevice(dev);
1497         if (err)
1498                 goto out;
1499 
1500         dev_hold(dev);
1501         ipgre_tunnel_link(ign, nt);
1502 
1503 out:
1504         return err;
1505 }
1506 
1507 static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
1508                             struct nlattr *data[])
1509 {
1510         struct ip_tunnel *t, *nt;
1511         struct net *net = dev_net(dev);
1512         struct ipgre_net *ign = net_generic(net, ipgre_net_id);
1513         struct ip_tunnel_parm p;
1514         int mtu;
1515 
1516         if (dev == ign->fb_tunnel_dev)
1517                 return -EINVAL;
1518 
1519         nt = netdev_priv(dev);
1520         ipgre_netlink_parms(data, &p);
1521 
1522         t = ipgre_tunnel_locate(net, &p, 0);
1523 
1524         if (t) {
1525                 if (t->dev != dev)
1526                         return -EEXIST;
1527         } else {
1528                 t = nt;
1529 
1530                 if (dev->type != ARPHRD_ETHER) {
1531                         unsigned nflags = 0;
1532 
1533                         if (ipv4_is_multicast(p.iph.daddr))
1534                                 nflags = IFF_BROADCAST;
1535                         else if (p.iph.daddr)
1536                                 nflags = IFF_POINTOPOINT;
1537 
1538                         if ((dev->flags ^ nflags) &
1539                             (IFF_POINTOPOINT | IFF_BROADCAST))
1540                                 return -EINVAL;
1541                 }
1542 
1543                 ipgre_tunnel_unlink(ign, t);
1544                 t->parms.iph.saddr = p.iph.saddr;
1545                 t->parms.iph.daddr = p.iph.daddr;
1546                 t->parms.i_key = p.i_key;
1547                 if (dev->type != ARPHRD_ETHER) {
1548                         memcpy(dev->dev_addr, &p.iph.saddr, 4);
1549                         memcpy(dev->broadcast, &p.iph.daddr, 4);
1550                 }
1551                 ipgre_tunnel_link(ign, t);
1552                 netdev_state_change(dev);
1553         }
1554 
1555         t->parms.o_key = p.o_key;
1556         t->parms.iph.ttl = p.iph.ttl;
1557         t->parms.iph.tos = p.iph.tos;
1558         t->parms.iph.frag_off = p.iph.frag_off;
1559 
1560         if (t->parms.link != p.link) {
1561                 t->parms.link = p.link;
1562                 mtu = ipgre_tunnel_bind_dev(dev);
1563                 if (!tb[IFLA_MTU])
1564                         dev->mtu = mtu;
1565                 netdev_state_change(dev);
1566         }
1567 
1568         return 0;
1569 }
1570 
1571 static size_t ipgre_get_size(const struct net_device *dev)
1572 {
1573         return
1574                 /* IFLA_GRE_LINK */
1575                 nla_total_size(4) +
1576                 /* IFLA_GRE_IFLAGS */
1577                 nla_total_size(2) +
1578                 /* IFLA_GRE_OFLAGS */
1579                 nla_total_size(2) +
1580                 /* IFLA_GRE_IKEY */
1581                 nla_total_size(4) +
1582                 /* IFLA_GRE_OKEY */
1583                 nla_total_size(4) +
1584                 /* IFLA_GRE_LOCAL */
1585                 nla_total_size(4) +
1586                 /* IFLA_GRE_REMOTE */
1587                 nla_total_size(4) +
1588                 /* IFLA_GRE_TTL */
1589                 nla_total_size(1) +
1590                 /* IFLA_GRE_TOS */
1591                 nla_total_size(1) +
1592                 /* IFLA_GRE_PMTUDISC */
1593                 nla_total_size(1) +
1594                 0;
1595 }
1596 
1597 static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
1598 {
1599         struct ip_tunnel *t = netdev_priv(dev);
1600         struct ip_tunnel_parm *p = &t->parms;
1601 
1602         NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link);
1603         NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags);
1604         NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags);
1605         NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key);
1606         NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key);
1607         NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr);
1608         NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr);
1609         NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl);
1610         NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos);
1611         NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF)));
1612 
1613         return 0;
1614 
1615 nla_put_failure:
1616         return -EMSGSIZE;
1617 }
1618 
1619 static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
1620         [IFLA_GRE_LINK]         = { .type = NLA_U32 },
1621         [IFLA_GRE_IFLAGS]       = { .type = NLA_U16 },
1622         [IFLA_GRE_OFLAGS]       = { .type = NLA_U16 },
1623         [IFLA_GRE_IKEY]         = { .type = NLA_U32 },
1624         [IFLA_GRE_OKEY]         = { .type = NLA_U32 },
1625         [IFLA_GRE_LOCAL]        = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
1626         [IFLA_GRE_REMOTE]       = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
1627         [IFLA_GRE_TTL]          = { .type = NLA_U8 },
1628         [IFLA_GRE_TOS]          = { .type = NLA_U8 },
1629         [IFLA_GRE_PMTUDISC]     = { .type = NLA_U8 },
1630 };
1631 
1632 static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
1633         .kind           = "gre",
1634         .maxtype        = IFLA_GRE_MAX,
1635         .policy         = ipgre_policy,
1636         .priv_size      = sizeof(struct ip_tunnel),
1637         .setup          = ipgre_tunnel_setup,
1638         .validate       = ipgre_tunnel_validate,
1639         .newlink        = ipgre_newlink,
1640         .changelink     = ipgre_changelink,
1641         .get_size       = ipgre_get_size,
1642         .fill_info      = ipgre_fill_info,
1643 };
1644 
1645 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
1646         .kind           = "gretap",
1647         .maxtype        = IFLA_GRE_MAX,
1648         .policy         = ipgre_policy,
1649         .priv_size      = sizeof(struct ip_tunnel),
1650         .setup          = ipgre_tap_setup,
1651         .validate       = ipgre_tap_validate,
1652         .newlink        = ipgre_newlink,
1653         .changelink     = ipgre_changelink,
1654         .get_size       = ipgre_get_size,
1655         .fill_info      = ipgre_fill_info,
1656 };
1657 
1658 /*
1659  *      And now the modules code and kernel interface.
1660  */
1661 
1662 static int __init ipgre_init(void)
1663 {
1664         int err;
1665 
1666         printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
1667 
1668         err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops);
1669         if (err < 0)
1670                 return err;
1671 
1672         err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE);
1673         if (err < 0) {
1674                 printk(KERN_INFO "ipgre init: can't add protocol\n");
1675                 goto add_proto_failed;
1676         }
1677 
1678         err = rtnl_link_register(&ipgre_link_ops);
1679         if (err < 0)
1680                 goto rtnl_link_failed;
1681 
1682         err = rtnl_link_register(&ipgre_tap_ops);
1683         if (err < 0)
1684                 goto tap_ops_failed;
1685 
1686 out:
1687         return err;
1688 
1689 tap_ops_failed:
1690         rtnl_link_unregister(&ipgre_link_ops);
1691 rtnl_link_failed:
1692         inet_del_protocol(&ipgre_protocol, IPPROTO_GRE);
1693 add_proto_failed:
1694         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1695         goto out;
1696 }
1697 
1698 static void __exit ipgre_fini(void)
1699 {
1700         rtnl_link_unregister(&ipgre_tap_ops);
1701         rtnl_link_unregister(&ipgre_link_ops);
1702         if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
1703                 printk(KERN_INFO "ipgre close: can't remove protocol\n");
1704         unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
1705 }
1706 
1707 module_init(ipgre_init);
1708 module_exit(ipgre_fini);
1709 MODULE_LICENSE("GPL");
1710 MODULE_ALIAS_RTNL_LINK("gre");
1711 MODULE_ALIAS_RTNL_LINK("gretap");
1712 MODULE_ALIAS_NETDEV("gre0");
1713 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp