~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/mpls/af_mpls.c

Version: ~ [ linux-5.2-rc1 ] ~ [ linux-5.1.2 ] ~ [ linux-5.0.16 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.43 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.119 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.176 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.179 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.139 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.67 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.39.4 ] ~ [ linux-2.6.38.8 ] ~ [ linux-2.6.37.6 ] ~ [ linux-2.6.36.4 ] ~ [ linux-2.6.35.14 ] ~ [ linux-2.6.34.15 ] ~ [ linux-2.6.33.20 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #include <linux/types.h>
  2 #include <linux/skbuff.h>
  3 #include <linux/socket.h>
  4 #include <linux/sysctl.h>
  5 #include <linux/net.h>
  6 #include <linux/module.h>
  7 #include <linux/if_arp.h>
  8 #include <linux/ipv6.h>
  9 #include <linux/mpls.h>
 10 #include <linux/vmalloc.h>
 11 #include <net/ip.h>
 12 #include <net/dst.h>
 13 #include <net/sock.h>
 14 #include <net/arp.h>
 15 #include <net/ip_fib.h>
 16 #include <net/netevent.h>
 17 #include <net/netns/generic.h>
 18 #if IS_ENABLED(CONFIG_IPV6)
 19 #include <net/ipv6.h>
 20 #include <net/addrconf.h>
 21 #endif
 22 #include "internal.h"
 23 
 24 #define LABEL_NOT_SPECIFIED (1<<20)
 25 #define MAX_NEW_LABELS 2
 26 
 27 /* This maximum ha length copied from the definition of struct neighbour */
 28 #define MAX_VIA_ALEN (ALIGN(MAX_ADDR_LEN, sizeof(unsigned long)))
 29 
 30 enum mpls_payload_type {
 31         MPT_UNSPEC, /* IPv4 or IPv6 */
 32         MPT_IPV4 = 4,
 33         MPT_IPV6 = 6,
 34 
 35         /* Other types not implemented:
 36          *  - Pseudo-wire with or without control word (RFC4385)
 37          *  - GAL (RFC5586)
 38          */
 39 };
 40 
 41 struct mpls_route { /* next hop label forwarding entry */
 42         struct net_device __rcu *rt_dev;
 43         struct rcu_head         rt_rcu;
 44         u32                     rt_label[MAX_NEW_LABELS];
 45         u8                      rt_protocol; /* routing protocol that set this entry */
 46         u8                      rt_payload_type;
 47         u8                      rt_labels;
 48         u8                      rt_via_alen;
 49         u8                      rt_via_table;
 50         u8                      rt_via[0];
 51 };
 52 
 53 static int zero = 0;
 54 static int label_limit = (1 << 20) - 1;
 55 
 56 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
 57                        struct nlmsghdr *nlh, struct net *net, u32 portid,
 58                        unsigned int nlm_flags);
 59 
 60 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
 61 {
 62         struct mpls_route *rt = NULL;
 63 
 64         if (index < net->mpls.platform_labels) {
 65                 struct mpls_route __rcu **platform_label =
 66                         rcu_dereference(net->mpls.platform_label);
 67                 rt = rcu_dereference(platform_label[index]);
 68         }
 69         return rt;
 70 }
 71 
 72 static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev)
 73 {
 74         return rcu_dereference_rtnl(dev->mpls_ptr);
 75 }
 76 
 77 bool mpls_output_possible(const struct net_device *dev)
 78 {
 79         return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev);
 80 }
 81 EXPORT_SYMBOL_GPL(mpls_output_possible);
 82 
 83 static unsigned int mpls_rt_header_size(const struct mpls_route *rt)
 84 {
 85         /* The size of the layer 2.5 labels to be added for this route */
 86         return rt->rt_labels * sizeof(struct mpls_shim_hdr);
 87 }
 88 
 89 unsigned int mpls_dev_mtu(const struct net_device *dev)
 90 {
 91         /* The amount of data the layer 2 frame can hold */
 92         return dev->mtu;
 93 }
 94 EXPORT_SYMBOL_GPL(mpls_dev_mtu);
 95 
 96 bool mpls_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
 97 {
 98         if (skb->len <= mtu)
 99                 return false;
100 
101         if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
102                 return false;
103 
104         return true;
105 }
106 EXPORT_SYMBOL_GPL(mpls_pkt_too_big);
107 
108 static bool mpls_egress(struct mpls_route *rt, struct sk_buff *skb,
109                         struct mpls_entry_decoded dec)
110 {
111         enum mpls_payload_type payload_type;
112         bool success = false;
113 
114         /* The IPv4 code below accesses through the IPv4 header
115          * checksum, which is 12 bytes into the packet.
116          * The IPv6 code below accesses through the IPv6 hop limit
117          * which is 8 bytes into the packet.
118          *
119          * For all supported cases there should always be at least 12
120          * bytes of packet data present.  The IPv4 header is 20 bytes
121          * without options and the IPv6 header is always 40 bytes
122          * long.
123          */
124         if (!pskb_may_pull(skb, 12))
125                 return false;
126 
127         payload_type = rt->rt_payload_type;
128         if (payload_type == MPT_UNSPEC)
129                 payload_type = ip_hdr(skb)->version;
130 
131         switch (payload_type) {
132         case MPT_IPV4: {
133                 struct iphdr *hdr4 = ip_hdr(skb);
134                 skb->protocol = htons(ETH_P_IP);
135                 csum_replace2(&hdr4->check,
136                               htons(hdr4->ttl << 8),
137                               htons(dec.ttl << 8));
138                 hdr4->ttl = dec.ttl;
139                 success = true;
140                 break;
141         }
142         case MPT_IPV6: {
143                 struct ipv6hdr *hdr6 = ipv6_hdr(skb);
144                 skb->protocol = htons(ETH_P_IPV6);
145                 hdr6->hop_limit = dec.ttl;
146                 success = true;
147                 break;
148         }
149         case MPT_UNSPEC:
150                 break;
151         }
152 
153         return success;
154 }
155 
156 static int mpls_forward(struct sk_buff *skb, struct net_device *dev,
157                         struct packet_type *pt, struct net_device *orig_dev)
158 {
159         struct net *net = dev_net(dev);
160         struct mpls_shim_hdr *hdr;
161         struct mpls_route *rt;
162         struct mpls_entry_decoded dec;
163         struct net_device *out_dev;
164         struct mpls_dev *mdev;
165         unsigned int hh_len;
166         unsigned int new_header_size;
167         unsigned int mtu;
168         int err;
169 
170         /* Careful this entire function runs inside of an rcu critical section */
171 
172         mdev = mpls_dev_get(dev);
173         if (!mdev || !mdev->input_enabled)
174                 goto drop;
175 
176         if (skb->pkt_type != PACKET_HOST)
177                 goto drop;
178 
179         if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
180                 goto drop;
181 
182         if (!pskb_may_pull(skb, sizeof(*hdr)))
183                 goto drop;
184 
185         /* Read and decode the label */
186         hdr = mpls_hdr(skb);
187         dec = mpls_entry_decode(hdr);
188 
189         /* Pop the label */
190         skb_pull(skb, sizeof(*hdr));
191         skb_reset_network_header(skb);
192 
193         skb_orphan(skb);
194 
195         rt = mpls_route_input_rcu(net, dec.label);
196         if (!rt)
197                 goto drop;
198 
199         /* Find the output device */
200         out_dev = rcu_dereference(rt->rt_dev);
201         if (!mpls_output_possible(out_dev))
202                 goto drop;
203 
204         if (skb_warn_if_lro(skb))
205                 goto drop;
206 
207         skb_forward_csum(skb);
208 
209         /* Verify ttl is valid */
210         if (dec.ttl <= 1)
211                 goto drop;
212         dec.ttl -= 1;
213 
214         /* Verify the destination can hold the packet */
215         new_header_size = mpls_rt_header_size(rt);
216         mtu = mpls_dev_mtu(out_dev);
217         if (mpls_pkt_too_big(skb, mtu - new_header_size))
218                 goto drop;
219 
220         hh_len = LL_RESERVED_SPACE(out_dev);
221         if (!out_dev->header_ops)
222                 hh_len = 0;
223 
224         /* Ensure there is enough space for the headers in the skb */
225         if (skb_cow(skb, hh_len + new_header_size))
226                 goto drop;
227 
228         skb->dev = out_dev;
229         skb->protocol = htons(ETH_P_MPLS_UC);
230 
231         if (unlikely(!new_header_size && dec.bos)) {
232                 /* Penultimate hop popping */
233                 if (!mpls_egress(rt, skb, dec))
234                         goto drop;
235         } else {
236                 bool bos;
237                 int i;
238                 skb_push(skb, new_header_size);
239                 skb_reset_network_header(skb);
240                 /* Push the new labels */
241                 hdr = mpls_hdr(skb);
242                 bos = dec.bos;
243                 for (i = rt->rt_labels - 1; i >= 0; i--) {
244                         hdr[i] = mpls_entry_encode(rt->rt_label[i], dec.ttl, 0, bos);
245                         bos = false;
246                 }
247         }
248 
249         err = neigh_xmit(rt->rt_via_table, out_dev, rt->rt_via, skb);
250         if (err)
251                 net_dbg_ratelimited("%s: packet transmission failed: %d\n",
252                                     __func__, err);
253         return 0;
254 
255 drop:
256         kfree_skb(skb);
257         return NET_RX_DROP;
258 }
259 
260 static struct packet_type mpls_packet_type __read_mostly = {
261         .type = cpu_to_be16(ETH_P_MPLS_UC),
262         .func = mpls_forward,
263 };
264 
265 static const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
266         [RTA_DST]               = { .type = NLA_U32 },
267         [RTA_OIF]               = { .type = NLA_U32 },
268 };
269 
270 struct mpls_route_config {
271         u32                     rc_protocol;
272         u32                     rc_ifindex;
273         u16                     rc_via_table;
274         u16                     rc_via_alen;
275         u8                      rc_via[MAX_VIA_ALEN];
276         u32                     rc_label;
277         u32                     rc_output_labels;
278         u32                     rc_output_label[MAX_NEW_LABELS];
279         u32                     rc_nlflags;
280         enum mpls_payload_type  rc_payload_type;
281         struct nl_info          rc_nlinfo;
282 };
283 
284 static struct mpls_route *mpls_rt_alloc(size_t alen)
285 {
286         struct mpls_route *rt;
287 
288         rt = kzalloc(sizeof(*rt) + alen, GFP_KERNEL);
289         if (rt)
290                 rt->rt_via_alen = alen;
291         return rt;
292 }
293 
294 static void mpls_rt_free(struct mpls_route *rt)
295 {
296         if (rt)
297                 kfree_rcu(rt, rt_rcu);
298 }
299 
300 static void mpls_notify_route(struct net *net, unsigned index,
301                               struct mpls_route *old, struct mpls_route *new,
302                               const struct nl_info *info)
303 {
304         struct nlmsghdr *nlh = info ? info->nlh : NULL;
305         unsigned portid = info ? info->portid : 0;
306         int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
307         struct mpls_route *rt = new ? new : old;
308         unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
309         /* Ignore reserved labels for now */
310         if (rt && (index >= MPLS_LABEL_FIRST_UNRESERVED))
311                 rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
312 }
313 
314 static void mpls_route_update(struct net *net, unsigned index,
315                               struct net_device *dev, struct mpls_route *new,
316                               const struct nl_info *info)
317 {
318         struct mpls_route __rcu **platform_label;
319         struct mpls_route *rt, *old = NULL;
320 
321         ASSERT_RTNL();
322 
323         platform_label = rtnl_dereference(net->mpls.platform_label);
324         rt = rtnl_dereference(platform_label[index]);
325         if (!dev || (rt && (rtnl_dereference(rt->rt_dev) == dev))) {
326                 rcu_assign_pointer(platform_label[index], new);
327                 old = rt;
328         }
329 
330         mpls_notify_route(net, index, old, new, info);
331 
332         /* If we removed a route free it now */
333         mpls_rt_free(old);
334 }
335 
336 static unsigned find_free_label(struct net *net)
337 {
338         struct mpls_route __rcu **platform_label;
339         size_t platform_labels;
340         unsigned index;
341 
342         platform_label = rtnl_dereference(net->mpls.platform_label);
343         platform_labels = net->mpls.platform_labels;
344         for (index = MPLS_LABEL_FIRST_UNRESERVED; index < platform_labels;
345              index++) {
346                 if (!rtnl_dereference(platform_label[index]))
347                         return index;
348         }
349         return LABEL_NOT_SPECIFIED;
350 }
351 
352 #if IS_ENABLED(CONFIG_INET)
353 static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
354 {
355         struct net_device *dev;
356         struct rtable *rt;
357         struct in_addr daddr;
358 
359         memcpy(&daddr, addr, sizeof(struct in_addr));
360         rt = ip_route_output(net, daddr.s_addr, 0, 0, 0);
361         if (IS_ERR(rt))
362                 return ERR_CAST(rt);
363 
364         dev = rt->dst.dev;
365         dev_hold(dev);
366 
367         ip_rt_put(rt);
368 
369         return dev;
370 }
371 #else
372 static struct net_device *inet_fib_lookup_dev(struct net *net, void *addr)
373 {
374         return ERR_PTR(-EAFNOSUPPORT);
375 }
376 #endif
377 
378 #if IS_ENABLED(CONFIG_IPV6)
379 static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
380 {
381         struct net_device *dev;
382         struct dst_entry *dst;
383         struct flowi6 fl6;
384         int err;
385 
386         if (!ipv6_stub)
387                 return ERR_PTR(-EAFNOSUPPORT);
388 
389         memset(&fl6, 0, sizeof(fl6));
390         memcpy(&fl6.daddr, addr, sizeof(struct in6_addr));
391         err = ipv6_stub->ipv6_dst_lookup(net, NULL, &dst, &fl6);
392         if (err)
393                 return ERR_PTR(err);
394 
395         dev = dst->dev;
396         dev_hold(dev);
397         dst_release(dst);
398 
399         return dev;
400 }
401 #else
402 static struct net_device *inet6_fib_lookup_dev(struct net *net, void *addr)
403 {
404         return ERR_PTR(-EAFNOSUPPORT);
405 }
406 #endif
407 
408 static struct net_device *find_outdev(struct net *net,
409                                       struct mpls_route_config *cfg)
410 {
411         struct net_device *dev = NULL;
412 
413         if (!cfg->rc_ifindex) {
414                 switch (cfg->rc_via_table) {
415                 case NEIGH_ARP_TABLE:
416                         dev = inet_fib_lookup_dev(net, cfg->rc_via);
417                         break;
418                 case NEIGH_ND_TABLE:
419                         dev = inet6_fib_lookup_dev(net, cfg->rc_via);
420                         break;
421                 case NEIGH_LINK_TABLE:
422                         break;
423                 }
424         } else {
425                 dev = dev_get_by_index(net, cfg->rc_ifindex);
426         }
427 
428         if (!dev)
429                 return ERR_PTR(-ENODEV);
430 
431         return dev;
432 }
433 
434 static int mpls_route_add(struct mpls_route_config *cfg)
435 {
436         struct mpls_route __rcu **platform_label;
437         struct net *net = cfg->rc_nlinfo.nl_net;
438         struct net_device *dev = NULL;
439         struct mpls_route *rt, *old;
440         unsigned index;
441         int i;
442         int err = -EINVAL;
443 
444         index = cfg->rc_label;
445 
446         /* If a label was not specified during insert pick one */
447         if ((index == LABEL_NOT_SPECIFIED) &&
448             (cfg->rc_nlflags & NLM_F_CREATE)) {
449                 index = find_free_label(net);
450         }
451 
452         /* Reserved labels may not be set */
453         if (index < MPLS_LABEL_FIRST_UNRESERVED)
454                 goto errout;
455 
456         /* The full 20 bit range may not be supported. */
457         if (index >= net->mpls.platform_labels)
458                 goto errout;
459 
460         /* Ensure only a supported number of labels are present */
461         if (cfg->rc_output_labels > MAX_NEW_LABELS)
462                 goto errout;
463 
464         dev = find_outdev(net, cfg);
465         if (IS_ERR(dev)) {
466                 err = PTR_ERR(dev);
467                 dev = NULL;
468                 goto errout;
469         }
470 
471         /* Ensure this is a supported device */
472         err = -EINVAL;
473         if (!mpls_dev_get(dev))
474                 goto errout;
475 
476         err = -EINVAL;
477         if ((cfg->rc_via_table == NEIGH_LINK_TABLE) &&
478             (dev->addr_len != cfg->rc_via_alen))
479                 goto errout;
480 
481         /* Append makes no sense with mpls */
482         err = -EOPNOTSUPP;
483         if (cfg->rc_nlflags & NLM_F_APPEND)
484                 goto errout;
485 
486         err = -EEXIST;
487         platform_label = rtnl_dereference(net->mpls.platform_label);
488         old = rtnl_dereference(platform_label[index]);
489         if ((cfg->rc_nlflags & NLM_F_EXCL) && old)
490                 goto errout;
491 
492         err = -EEXIST;
493         if (!(cfg->rc_nlflags & NLM_F_REPLACE) && old)
494                 goto errout;
495 
496         err = -ENOENT;
497         if (!(cfg->rc_nlflags & NLM_F_CREATE) && !old)
498                 goto errout;
499 
500         err = -ENOMEM;
501         rt = mpls_rt_alloc(cfg->rc_via_alen);
502         if (!rt)
503                 goto errout;
504 
505         rt->rt_labels = cfg->rc_output_labels;
506         for (i = 0; i < rt->rt_labels; i++)
507                 rt->rt_label[i] = cfg->rc_output_label[i];
508         rt->rt_protocol = cfg->rc_protocol;
509         RCU_INIT_POINTER(rt->rt_dev, dev);
510         rt->rt_payload_type = cfg->rc_payload_type;
511         rt->rt_via_table = cfg->rc_via_table;
512         memcpy(rt->rt_via, cfg->rc_via, cfg->rc_via_alen);
513 
514         mpls_route_update(net, index, NULL, rt, &cfg->rc_nlinfo);
515 
516         dev_put(dev);
517         return 0;
518 
519 errout:
520         if (dev)
521                 dev_put(dev);
522         return err;
523 }
524 
525 static int mpls_route_del(struct mpls_route_config *cfg)
526 {
527         struct net *net = cfg->rc_nlinfo.nl_net;
528         unsigned index;
529         int err = -EINVAL;
530 
531         index = cfg->rc_label;
532 
533         /* Reserved labels may not be removed */
534         if (index < MPLS_LABEL_FIRST_UNRESERVED)
535                 goto errout;
536 
537         /* The full 20 bit range may not be supported */
538         if (index >= net->mpls.platform_labels)
539                 goto errout;
540 
541         mpls_route_update(net, index, NULL, NULL, &cfg->rc_nlinfo);
542 
543         err = 0;
544 errout:
545         return err;
546 }
547 
548 #define MPLS_PERDEV_SYSCTL_OFFSET(field)        \
549         (&((struct mpls_dev *)0)->field)
550 
551 static const struct ctl_table mpls_dev_table[] = {
552         {
553                 .procname       = "input",
554                 .maxlen         = sizeof(int),
555                 .mode           = 0644,
556                 .proc_handler   = proc_dointvec,
557                 .data           = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled),
558         },
559         { }
560 };
561 
562 static int mpls_dev_sysctl_register(struct net_device *dev,
563                                     struct mpls_dev *mdev)
564 {
565         char path[sizeof("net/mpls/conf/") + IFNAMSIZ];
566         struct ctl_table *table;
567         int i;
568 
569         table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL);
570         if (!table)
571                 goto out;
572 
573         /* Table data contains only offsets relative to the base of
574          * the mdev at this point, so make them absolute.
575          */
576         for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++)
577                 table[i].data = (char *)mdev + (uintptr_t)table[i].data;
578 
579         snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name);
580 
581         mdev->sysctl = register_net_sysctl(dev_net(dev), path, table);
582         if (!mdev->sysctl)
583                 goto free;
584 
585         return 0;
586 
587 free:
588         kfree(table);
589 out:
590         return -ENOBUFS;
591 }
592 
593 static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev)
594 {
595         struct ctl_table *table;
596 
597         table = mdev->sysctl->ctl_table_arg;
598         unregister_net_sysctl_table(mdev->sysctl);
599         kfree(table);
600 }
601 
602 static struct mpls_dev *mpls_add_dev(struct net_device *dev)
603 {
604         struct mpls_dev *mdev;
605         int err = -ENOMEM;
606 
607         ASSERT_RTNL();
608 
609         mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
610         if (!mdev)
611                 return ERR_PTR(err);
612 
613         err = mpls_dev_sysctl_register(dev, mdev);
614         if (err)
615                 goto free;
616 
617         rcu_assign_pointer(dev->mpls_ptr, mdev);
618 
619         return mdev;
620 
621 free:
622         kfree(mdev);
623         return ERR_PTR(err);
624 }
625 
626 static void mpls_ifdown(struct net_device *dev)
627 {
628         struct mpls_route __rcu **platform_label;
629         struct net *net = dev_net(dev);
630         struct mpls_dev *mdev;
631         unsigned index;
632 
633         platform_label = rtnl_dereference(net->mpls.platform_label);
634         for (index = 0; index < net->mpls.platform_labels; index++) {
635                 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
636                 if (!rt)
637                         continue;
638                 if (rtnl_dereference(rt->rt_dev) != dev)
639                         continue;
640                 rt->rt_dev = NULL;
641         }
642 
643         mdev = mpls_dev_get(dev);
644         if (!mdev)
645                 return;
646 
647         mpls_dev_sysctl_unregister(mdev);
648 
649         RCU_INIT_POINTER(dev->mpls_ptr, NULL);
650 
651         kfree_rcu(mdev, rcu);
652 }
653 
654 static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
655                            void *ptr)
656 {
657         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
658         struct mpls_dev *mdev;
659 
660         switch(event) {
661         case NETDEV_REGISTER:
662                 /* For now just support ethernet devices */
663                 if ((dev->type == ARPHRD_ETHER) ||
664                     (dev->type == ARPHRD_LOOPBACK)) {
665                         mdev = mpls_add_dev(dev);
666                         if (IS_ERR(mdev))
667                                 return notifier_from_errno(PTR_ERR(mdev));
668                 }
669                 break;
670 
671         case NETDEV_UNREGISTER:
672                 mpls_ifdown(dev);
673                 break;
674         case NETDEV_CHANGENAME:
675                 mdev = mpls_dev_get(dev);
676                 if (mdev) {
677                         int err;
678 
679                         mpls_dev_sysctl_unregister(mdev);
680                         err = mpls_dev_sysctl_register(dev, mdev);
681                         if (err)
682                                 return notifier_from_errno(err);
683                 }
684                 break;
685         }
686         return NOTIFY_OK;
687 }
688 
689 static struct notifier_block mpls_dev_notifier = {
690         .notifier_call = mpls_dev_notify,
691 };
692 
693 static int nla_put_via(struct sk_buff *skb,
694                        u8 table, const void *addr, int alen)
695 {
696         static const int table_to_family[NEIGH_NR_TABLES + 1] = {
697                 AF_INET, AF_INET6, AF_DECnet, AF_PACKET,
698         };
699         struct nlattr *nla;
700         struct rtvia *via;
701         int family = AF_UNSPEC;
702 
703         nla = nla_reserve(skb, RTA_VIA, alen + 2);
704         if (!nla)
705                 return -EMSGSIZE;
706 
707         if (table <= NEIGH_NR_TABLES)
708                 family = table_to_family[table];
709 
710         via = nla_data(nla);
711         via->rtvia_family = family;
712         memcpy(via->rtvia_addr, addr, alen);
713         return 0;
714 }
715 
716 int nla_put_labels(struct sk_buff *skb, int attrtype,
717                    u8 labels, const u32 label[])
718 {
719         struct nlattr *nla;
720         struct mpls_shim_hdr *nla_label;
721         bool bos;
722         int i;
723         nla = nla_reserve(skb, attrtype, labels*4);
724         if (!nla)
725                 return -EMSGSIZE;
726 
727         nla_label = nla_data(nla);
728         bos = true;
729         for (i = labels - 1; i >= 0; i--) {
730                 nla_label[i] = mpls_entry_encode(label[i], 0, 0, bos);
731                 bos = false;
732         }
733 
734         return 0;
735 }
736 EXPORT_SYMBOL_GPL(nla_put_labels);
737 
738 int nla_get_labels(const struct nlattr *nla,
739                    u32 max_labels, u32 *labels, u32 label[])
740 {
741         unsigned len = nla_len(nla);
742         unsigned nla_labels;
743         struct mpls_shim_hdr *nla_label;
744         bool bos;
745         int i;
746 
747         /* len needs to be an even multiple of 4 (the label size) */
748         if (len & 3)
749                 return -EINVAL;
750 
751         /* Limit the number of new labels allowed */
752         nla_labels = len/4;
753         if (nla_labels > max_labels)
754                 return -EINVAL;
755 
756         nla_label = nla_data(nla);
757         bos = true;
758         for (i = nla_labels - 1; i >= 0; i--, bos = false) {
759                 struct mpls_entry_decoded dec;
760                 dec = mpls_entry_decode(nla_label + i);
761 
762                 /* Ensure the bottom of stack flag is properly set
763                  * and ttl and tc are both clear.
764                  */
765                 if ((dec.bos != bos) || dec.ttl || dec.tc)
766                         return -EINVAL;
767 
768                 switch (dec.label) {
769                 case MPLS_LABEL_IMPLNULL:
770                         /* RFC3032: This is a label that an LSR may
771                          * assign and distribute, but which never
772                          * actually appears in the encapsulation.
773                          */
774                         return -EINVAL;
775                 }
776 
777                 label[i] = dec.label;
778         }
779         *labels = nla_labels;
780         return 0;
781 }
782 EXPORT_SYMBOL_GPL(nla_get_labels);
783 
784 static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
785                                struct mpls_route_config *cfg)
786 {
787         struct rtmsg *rtm;
788         struct nlattr *tb[RTA_MAX+1];
789         int index;
790         int err;
791 
792         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
793         if (err < 0)
794                 goto errout;
795 
796         err = -EINVAL;
797         rtm = nlmsg_data(nlh);
798         memset(cfg, 0, sizeof(*cfg));
799 
800         if (rtm->rtm_family != AF_MPLS)
801                 goto errout;
802         if (rtm->rtm_dst_len != 20)
803                 goto errout;
804         if (rtm->rtm_src_len != 0)
805                 goto errout;
806         if (rtm->rtm_tos != 0)
807                 goto errout;
808         if (rtm->rtm_table != RT_TABLE_MAIN)
809                 goto errout;
810         /* Any value is acceptable for rtm_protocol */
811 
812         /* As mpls uses destination specific addresses
813          * (or source specific address in the case of multicast)
814          * all addresses have universal scope.
815          */
816         if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
817                 goto errout;
818         if (rtm->rtm_type != RTN_UNICAST)
819                 goto errout;
820         if (rtm->rtm_flags != 0)
821                 goto errout;
822 
823         cfg->rc_label           = LABEL_NOT_SPECIFIED;
824         cfg->rc_protocol        = rtm->rtm_protocol;
825         cfg->rc_nlflags         = nlh->nlmsg_flags;
826         cfg->rc_nlinfo.portid   = NETLINK_CB(skb).portid;
827         cfg->rc_nlinfo.nlh      = nlh;
828         cfg->rc_nlinfo.nl_net   = sock_net(skb->sk);
829 
830         for (index = 0; index <= RTA_MAX; index++) {
831                 struct nlattr *nla = tb[index];
832                 if (!nla)
833                         continue;
834 
835                 switch(index) {
836                 case RTA_OIF:
837                         cfg->rc_ifindex = nla_get_u32(nla);
838                         break;
839                 case RTA_NEWDST:
840                         if (nla_get_labels(nla, MAX_NEW_LABELS,
841                                            &cfg->rc_output_labels,
842                                            cfg->rc_output_label))
843                                 goto errout;
844                         break;
845                 case RTA_DST:
846                 {
847                         u32 label_count;
848                         if (nla_get_labels(nla, 1, &label_count,
849                                            &cfg->rc_label))
850                                 goto errout;
851 
852                         /* Reserved labels may not be set */
853                         if (cfg->rc_label < MPLS_LABEL_FIRST_UNRESERVED)
854                                 goto errout;
855 
856                         break;
857                 }
858                 case RTA_VIA:
859                 {
860                         struct rtvia *via = nla_data(nla);
861                         if (nla_len(nla) < offsetof(struct rtvia, rtvia_addr))
862                                 goto errout;
863                         cfg->rc_via_alen   = nla_len(nla) -
864                                 offsetof(struct rtvia, rtvia_addr);
865                         if (cfg->rc_via_alen > MAX_VIA_ALEN)
866                                 goto errout;
867 
868                         /* Validate the address family */
869                         switch(via->rtvia_family) {
870                         case AF_PACKET:
871                                 cfg->rc_via_table = NEIGH_LINK_TABLE;
872                                 break;
873                         case AF_INET:
874                                 cfg->rc_via_table = NEIGH_ARP_TABLE;
875                                 if (cfg->rc_via_alen != 4)
876                                         goto errout;
877                                 break;
878                         case AF_INET6:
879                                 cfg->rc_via_table = NEIGH_ND_TABLE;
880                                 if (cfg->rc_via_alen != 16)
881                                         goto errout;
882                                 break;
883                         default:
884                                 /* Unsupported address family */
885                                 goto errout;
886                         }
887 
888                         memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
889                         break;
890                 }
891                 default:
892                         /* Unsupported attribute */
893                         goto errout;
894                 }
895         }
896 
897         err = 0;
898 errout:
899         return err;
900 }
901 
902 static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
903 {
904         struct mpls_route_config cfg;
905         int err;
906 
907         err = rtm_to_route_config(skb, nlh, &cfg);
908         if (err < 0)
909                 return err;
910 
911         return mpls_route_del(&cfg);
912 }
913 
914 
915 static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
916 {
917         struct mpls_route_config cfg;
918         int err;
919 
920         err = rtm_to_route_config(skb, nlh, &cfg);
921         if (err < 0)
922                 return err;
923 
924         return mpls_route_add(&cfg);
925 }
926 
927 static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
928                            u32 label, struct mpls_route *rt, int flags)
929 {
930         struct net_device *dev;
931         struct nlmsghdr *nlh;
932         struct rtmsg *rtm;
933 
934         nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
935         if (nlh == NULL)
936                 return -EMSGSIZE;
937 
938         rtm = nlmsg_data(nlh);
939         rtm->rtm_family = AF_MPLS;
940         rtm->rtm_dst_len = 20;
941         rtm->rtm_src_len = 0;
942         rtm->rtm_tos = 0;
943         rtm->rtm_table = RT_TABLE_MAIN;
944         rtm->rtm_protocol = rt->rt_protocol;
945         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
946         rtm->rtm_type = RTN_UNICAST;
947         rtm->rtm_flags = 0;
948 
949         if (rt->rt_labels &&
950             nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
951                 goto nla_put_failure;
952         if (nla_put_via(skb, rt->rt_via_table, rt->rt_via, rt->rt_via_alen))
953                 goto nla_put_failure;
954         dev = rtnl_dereference(rt->rt_dev);
955         if (dev && nla_put_u32(skb, RTA_OIF, dev->ifindex))
956                 goto nla_put_failure;
957         if (nla_put_labels(skb, RTA_DST, 1, &label))
958                 goto nla_put_failure;
959 
960         nlmsg_end(skb, nlh);
961         return 0;
962 
963 nla_put_failure:
964         nlmsg_cancel(skb, nlh);
965         return -EMSGSIZE;
966 }
967 
968 static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
969 {
970         struct net *net = sock_net(skb->sk);
971         struct mpls_route __rcu **platform_label;
972         size_t platform_labels;
973         unsigned int index;
974 
975         ASSERT_RTNL();
976 
977         index = cb->args[0];
978         if (index < MPLS_LABEL_FIRST_UNRESERVED)
979                 index = MPLS_LABEL_FIRST_UNRESERVED;
980 
981         platform_label = rtnl_dereference(net->mpls.platform_label);
982         platform_labels = net->mpls.platform_labels;
983         for (; index < platform_labels; index++) {
984                 struct mpls_route *rt;
985                 rt = rtnl_dereference(platform_label[index]);
986                 if (!rt)
987                         continue;
988 
989                 if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
990                                     cb->nlh->nlmsg_seq, RTM_NEWROUTE,
991                                     index, rt, NLM_F_MULTI) < 0)
992                         break;
993         }
994         cb->args[0] = index;
995 
996         return skb->len;
997 }
998 
999 static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
1000 {
1001         size_t payload =
1002                 NLMSG_ALIGN(sizeof(struct rtmsg))
1003                 + nla_total_size(2 + rt->rt_via_alen)   /* RTA_VIA */
1004                 + nla_total_size(4);                    /* RTA_DST */
1005         if (rt->rt_labels)                              /* RTA_NEWDST */
1006                 payload += nla_total_size(rt->rt_labels * 4);
1007         if (rt->rt_dev)                                 /* RTA_OIF */
1008                 payload += nla_total_size(4);
1009         return payload;
1010 }
1011 
1012 static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
1013                        struct nlmsghdr *nlh, struct net *net, u32 portid,
1014                        unsigned int nlm_flags)
1015 {
1016         struct sk_buff *skb;
1017         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1018         int err = -ENOBUFS;
1019 
1020         skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
1021         if (skb == NULL)
1022                 goto errout;
1023 
1024         err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
1025         if (err < 0) {
1026                 /* -EMSGSIZE implies BUG in lfib_nlmsg_size */
1027                 WARN_ON(err == -EMSGSIZE);
1028                 kfree_skb(skb);
1029                 goto errout;
1030         }
1031         rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
1032 
1033         return;
1034 errout:
1035         if (err < 0)
1036                 rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
1037 }
1038 
1039 static int resize_platform_label_table(struct net *net, size_t limit)
1040 {
1041         size_t size = sizeof(struct mpls_route *) * limit;
1042         size_t old_limit;
1043         size_t cp_size;
1044         struct mpls_route __rcu **labels = NULL, **old;
1045         struct mpls_route *rt0 = NULL, *rt2 = NULL;
1046         unsigned index;
1047 
1048         if (size) {
1049                 labels = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
1050                 if (!labels)
1051                         labels = vzalloc(size);
1052 
1053                 if (!labels)
1054                         goto nolabels;
1055         }
1056 
1057         /* In case the predefined labels need to be populated */
1058         if (limit > MPLS_LABEL_IPV4NULL) {
1059                 struct net_device *lo = net->loopback_dev;
1060                 rt0 = mpls_rt_alloc(lo->addr_len);
1061                 if (!rt0)
1062                         goto nort0;
1063                 RCU_INIT_POINTER(rt0->rt_dev, lo);
1064                 rt0->rt_protocol = RTPROT_KERNEL;
1065                 rt0->rt_payload_type = MPT_IPV4;
1066                 rt0->rt_via_table = NEIGH_LINK_TABLE;
1067                 memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
1068         }
1069         if (limit > MPLS_LABEL_IPV6NULL) {
1070                 struct net_device *lo = net->loopback_dev;
1071                 rt2 = mpls_rt_alloc(lo->addr_len);
1072                 if (!rt2)
1073                         goto nort2;
1074                 RCU_INIT_POINTER(rt2->rt_dev, lo);
1075                 rt2->rt_protocol = RTPROT_KERNEL;
1076                 rt2->rt_payload_type = MPT_IPV6;
1077                 rt2->rt_via_table = NEIGH_LINK_TABLE;
1078                 memcpy(rt2->rt_via, lo->dev_addr, lo->addr_len);
1079         }
1080 
1081         rtnl_lock();
1082         /* Remember the original table */
1083         old = rtnl_dereference(net->mpls.platform_label);
1084         old_limit = net->mpls.platform_labels;
1085 
1086         /* Free any labels beyond the new table */
1087         for (index = limit; index < old_limit; index++)
1088                 mpls_route_update(net, index, NULL, NULL, NULL);
1089 
1090         /* Copy over the old labels */
1091         cp_size = size;
1092         if (old_limit < limit)
1093                 cp_size = old_limit * sizeof(struct mpls_route *);
1094 
1095         memcpy(labels, old, cp_size);
1096 
1097         /* If needed set the predefined labels */
1098         if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
1099             (limit > MPLS_LABEL_IPV6NULL)) {
1100                 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
1101                 rt2 = NULL;
1102         }
1103 
1104         if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
1105             (limit > MPLS_LABEL_IPV4NULL)) {
1106                 RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
1107                 rt0 = NULL;
1108         }
1109 
1110         /* Update the global pointers */
1111         net->mpls.platform_labels = limit;
1112         rcu_assign_pointer(net->mpls.platform_label, labels);
1113 
1114         rtnl_unlock();
1115 
1116         mpls_rt_free(rt2);
1117         mpls_rt_free(rt0);
1118 
1119         if (old) {
1120                 synchronize_rcu();
1121                 kvfree(old);
1122         }
1123         return 0;
1124 
1125 nort2:
1126         mpls_rt_free(rt0);
1127 nort0:
1128         kvfree(labels);
1129 nolabels:
1130         return -ENOMEM;
1131 }
1132 
1133 static int mpls_platform_labels(struct ctl_table *table, int write,
1134                                 void __user *buffer, size_t *lenp, loff_t *ppos)
1135 {
1136         struct net *net = table->data;
1137         int platform_labels = net->mpls.platform_labels;
1138         int ret;
1139         struct ctl_table tmp = {
1140                 .procname       = table->procname,
1141                 .data           = &platform_labels,
1142                 .maxlen         = sizeof(int),
1143                 .mode           = table->mode,
1144                 .extra1         = &zero,
1145                 .extra2         = &label_limit,
1146         };
1147 
1148         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
1149 
1150         if (write && ret == 0)
1151                 ret = resize_platform_label_table(net, platform_labels);
1152 
1153         return ret;
1154 }
1155 
1156 static const struct ctl_table mpls_table[] = {
1157         {
1158                 .procname       = "platform_labels",
1159                 .data           = NULL,
1160                 .maxlen         = sizeof(int),
1161                 .mode           = 0644,
1162                 .proc_handler   = mpls_platform_labels,
1163         },
1164         { }
1165 };
1166 
1167 static int mpls_net_init(struct net *net)
1168 {
1169         struct ctl_table *table;
1170 
1171         net->mpls.platform_labels = 0;
1172         net->mpls.platform_label = NULL;
1173 
1174         table = kmemdup(mpls_table, sizeof(mpls_table), GFP_KERNEL);
1175         if (table == NULL)
1176                 return -ENOMEM;
1177 
1178         table[0].data = net;
1179         net->mpls.ctl = register_net_sysctl(net, "net/mpls", table);
1180         if (net->mpls.ctl == NULL) {
1181                 kfree(table);
1182                 return -ENOMEM;
1183         }
1184 
1185         return 0;
1186 }
1187 
1188 static void mpls_net_exit(struct net *net)
1189 {
1190         struct mpls_route __rcu **platform_label;
1191         size_t platform_labels;
1192         struct ctl_table *table;
1193         unsigned int index;
1194 
1195         table = net->mpls.ctl->ctl_table_arg;
1196         unregister_net_sysctl_table(net->mpls.ctl);
1197         kfree(table);
1198 
1199         /* An rcu grace period has passed since there was a device in
1200          * the network namespace (and thus the last in flight packet)
1201          * left this network namespace.  This is because
1202          * unregister_netdevice_many and netdev_run_todo has completed
1203          * for each network device that was in this network namespace.
1204          *
1205          * As such no additional rcu synchronization is necessary when
1206          * freeing the platform_label table.
1207          */
1208         rtnl_lock();
1209         platform_label = rtnl_dereference(net->mpls.platform_label);
1210         platform_labels = net->mpls.platform_labels;
1211         for (index = 0; index < platform_labels; index++) {
1212                 struct mpls_route *rt = rtnl_dereference(platform_label[index]);
1213                 RCU_INIT_POINTER(platform_label[index], NULL);
1214                 mpls_rt_free(rt);
1215         }
1216         rtnl_unlock();
1217 
1218         kvfree(platform_label);
1219 }
1220 
1221 static struct pernet_operations mpls_net_ops = {
1222         .init = mpls_net_init,
1223         .exit = mpls_net_exit,
1224 };
1225 
1226 static int __init mpls_init(void)
1227 {
1228         int err;
1229 
1230         BUILD_BUG_ON(sizeof(struct mpls_shim_hdr) != 4);
1231 
1232         err = register_pernet_subsys(&mpls_net_ops);
1233         if (err)
1234                 goto out;
1235 
1236         err = register_netdevice_notifier(&mpls_dev_notifier);
1237         if (err)
1238                 goto out_unregister_pernet;
1239 
1240         dev_add_pack(&mpls_packet_type);
1241 
1242         rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
1243         rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
1244         rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL);
1245         err = 0;
1246 out:
1247         return err;
1248 
1249 out_unregister_pernet:
1250         unregister_pernet_subsys(&mpls_net_ops);
1251         goto out;
1252 }
1253 module_init(mpls_init);
1254 
1255 static void __exit mpls_exit(void)
1256 {
1257         rtnl_unregister_all(PF_MPLS);
1258         dev_remove_pack(&mpls_packet_type);
1259         unregister_netdevice_notifier(&mpls_dev_notifier);
1260         unregister_pernet_subsys(&mpls_net_ops);
1261 }
1262 module_exit(mpls_exit);
1263 
1264 MODULE_DESCRIPTION("MultiProtocol Label Switching");
1265 MODULE_LICENSE("GPL v2");
1266 MODULE_ALIAS_NETPROTO(PF_MPLS);
1267 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp