~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/ipmr.c

Version: ~ [ linux-5.2 ] ~ [ linux-5.1.16 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.57 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.132 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.184 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.184 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.69 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.39.4 ] ~ [ linux-2.6.38.8 ] ~ [ linux-2.6.37.6 ] ~ [ linux-2.6.36.4 ] ~ [ linux-2.6.35.14 ] ~ [ linux-2.6.34.15 ] ~ [ linux-2.6.33.20 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      IP multicast routing support for mrouted 3.6/3.8
  3  *
  4  *              (c) 1995 Alan Cox, <alan@redhat.com>
  5  *        Linux Consultancy and Custom Driver Development
  6  *
  7  *      This program is free software; you can redistribute it and/or
  8  *      modify it under the terms of the GNU General Public License
  9  *      as published by the Free Software Foundation; either version
 10  *      2 of the License, or (at your option) any later version.
 11  *
 12  *      Version: $Id: ipmr.c,v 1.65 2001/10/31 21:55:54 davem Exp $
 13  *
 14  *      Fixes:
 15  *      Michael Chastain        :       Incorrect size of copying.
 16  *      Alan Cox                :       Added the cache manager code
 17  *      Alan Cox                :       Fixed the clone/copy bug and device race.
 18  *      Mike McLagan            :       Routing by source
 19  *      Malcolm Beattie         :       Buffer handling fixes.
 20  *      Alexey Kuznetsov        :       Double buffer free and other fixes.
 21  *      SVR Anand               :       Fixed several multicast bugs and problems.
 22  *      Alexey Kuznetsov        :       Status, optimisations and more.
 23  *      Brad Parker             :       Better behaviour on mrouted upcall
 24  *                                      overflow.
 25  *      Carlos Picoto           :       PIMv1 Support
 26  *      Pavlin Ivanov Radoslavov:       PIMv2 Registers must checksum only PIM header
 27  *                                      Relax this requrement to work with older peers.
 28  *
 29  */
 30 
 31 #include <linux/config.h>
 32 #include <asm/system.h>
 33 #include <asm/uaccess.h>
 34 #include <linux/types.h>
 35 #include <linux/sched.h>
 36 #include <linux/errno.h>
 37 #include <linux/timer.h>
 38 #include <linux/mm.h>
 39 #include <linux/kernel.h>
 40 #include <linux/fcntl.h>
 41 #include <linux/stat.h>
 42 #include <linux/socket.h>
 43 #include <linux/in.h>
 44 #include <linux/inet.h>
 45 #include <linux/netdevice.h>
 46 #include <linux/inetdevice.h>
 47 #include <linux/igmp.h>
 48 #include <linux/proc_fs.h>
 49 #include <linux/mroute.h>
 50 #include <linux/init.h>
 51 #include <net/ip.h>
 52 #include <net/protocol.h>
 53 #include <linux/skbuff.h>
 54 #include <net/sock.h>
 55 #include <net/icmp.h>
 56 #include <net/udp.h>
 57 #include <net/raw.h>
 58 #include <linux/notifier.h>
 59 #include <linux/if_arp.h>
 60 #include <linux/netfilter_ipv4.h>
 61 #include <net/ipip.h>
 62 #include <net/checksum.h>
 63 
 64 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 65 #define CONFIG_IP_PIMSM 1
 66 #endif
 67 
 68 static struct sock *mroute_socket;
 69 
 70 
 71 /* Big lock, protecting vif table, mrt cache and mroute socket state.
 72    Note that the changes are semaphored via rtnl_lock.
 73  */
 74 
 75 static rwlock_t mrt_lock = RW_LOCK_UNLOCKED;
 76 
 77 /*
 78  *      Multicast router control variables
 79  */
 80 
 81 static struct vif_device vif_table[MAXVIFS];            /* Devices              */
 82 static int maxvif;
 83 
 84 #define VIF_EXISTS(idx) (vif_table[idx].dev != NULL)
 85 
 86 int mroute_do_assert;                                   /* Set in PIM assert    */
 87 int mroute_do_pim;
 88 
 89 static struct mfc_cache *mfc_cache_array[MFC_LINES];    /* Forwarding cache     */
 90 
 91 static struct mfc_cache *mfc_unres_queue;               /* Queue of unresolved entries */
 92 atomic_t cache_resolve_queue_len;                       /* Size of unresolved   */
 93 
 94 /* Special spinlock for queue of unresolved entries */
 95 static spinlock_t mfc_unres_lock = SPIN_LOCK_UNLOCKED;
 96 
 97 /* We return to original Alan's scheme. Hash table of resolved
 98    entries is changed only in process context and protected
 99    with weak lock mrt_lock. Queue of unresolved entries is protected
100    with strong spinlock mfc_unres_lock.
101 
102    In this case data path is free of exclusive locks at all.
103  */
104 
105 kmem_cache_t *mrt_cachep;
106 
107 static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
108 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
109 static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
110 
111 extern struct inet_protocol pim_protocol;
112 
113 static struct timer_list ipmr_expire_timer;
114 
115 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
116 
117 static
118 struct net_device *ipmr_new_tunnel(struct vifctl *v)
119 {
120         struct net_device  *dev;
121 
122         dev = __dev_get_by_name("tunl0");
123 
124         if (dev) {
125                 int err;
126                 struct ifreq ifr;
127                 mm_segment_t    oldfs;
128                 struct ip_tunnel_parm p;
129                 struct in_device  *in_dev;
130 
131                 memset(&p, 0, sizeof(p));
132                 p.iph.daddr = v->vifc_rmt_addr.s_addr;
133                 p.iph.saddr = v->vifc_lcl_addr.s_addr;
134                 p.iph.version = 4;
135                 p.iph.ihl = 5;
136                 p.iph.protocol = IPPROTO_IPIP;
137                 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
138                 ifr.ifr_ifru.ifru_data = (void*)&p;
139 
140                 oldfs = get_fs(); set_fs(KERNEL_DS);
141                 err = dev->do_ioctl(dev, &ifr, SIOCADDTUNNEL);
142                 set_fs(oldfs);
143 
144                 dev = NULL;
145 
146                 if (err == 0 && (dev = __dev_get_by_name(p.name)) != NULL) {
147                         dev->flags |= IFF_MULTICAST;
148 
149                         in_dev = __in_dev_get(dev);
150                         if (in_dev == NULL && (in_dev = inetdev_init(dev)) == NULL)
151                                 goto failure;
152                         in_dev->cnf.rp_filter = 0;
153 
154                         if (dev_open(dev))
155                                 goto failure;
156                 }
157         }
158         return dev;
159 
160 failure:
161         unregister_netdevice(dev);
162         return NULL;
163 }
164 
165 #ifdef CONFIG_IP_PIMSM
166 
167 static int reg_vif_num = -1;
168 
169 static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
170 {
171         read_lock(&mrt_lock);
172         ((struct net_device_stats*)dev->priv)->tx_bytes += skb->len;
173         ((struct net_device_stats*)dev->priv)->tx_packets++;
174         ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT);
175         read_unlock(&mrt_lock);
176         kfree_skb(skb);
177         return 0;
178 }
179 
180 static struct net_device_stats *reg_vif_get_stats(struct net_device *dev)
181 {
182         return (struct net_device_stats*)dev->priv;
183 }
184 
185 static
186 struct net_device *ipmr_reg_vif(struct vifctl *v)
187 {
188         struct net_device  *dev;
189         struct in_device *in_dev;
190         int size;
191 
192         size = sizeof(*dev) + sizeof(struct net_device_stats);
193         dev = kmalloc(size, GFP_KERNEL);
194         if (!dev)
195                 return NULL;
196 
197         memset(dev, 0, size);
198 
199         dev->priv = dev + 1;
200 
201         strcpy(dev->name, "pimreg");
202 
203         dev->type               = ARPHRD_PIMREG;
204         dev->mtu                = 1500 - sizeof(struct iphdr) - 8;
205         dev->flags              = IFF_NOARP;
206         dev->hard_start_xmit    = reg_vif_xmit;
207         dev->get_stats          = reg_vif_get_stats;
208         dev->features           |= NETIF_F_DYNALLOC;
209 
210         if (register_netdevice(dev)) {
211                 kfree(dev);
212                 return NULL;
213         }
214         dev->iflink = 0;
215 
216         if ((in_dev = inetdev_init(dev)) == NULL)
217                 goto failure;
218 
219         in_dev->cnf.rp_filter = 0;
220 
221         if (dev_open(dev))
222                 goto failure;
223 
224         return dev;
225 
226 failure:
227         unregister_netdevice(dev);
228         return NULL;
229 }
230 #endif
231 
232 /*
233  *      Delete a VIF entry
234  */
235  
236 static int vif_delete(int vifi)
237 {
238         struct vif_device *v;
239         struct net_device *dev;
240         struct in_device *in_dev;
241 
242         if (vifi < 0 || vifi >= maxvif)
243                 return -EADDRNOTAVAIL;
244 
245         v = &vif_table[vifi];
246 
247         write_lock_bh(&mrt_lock);
248         dev = v->dev;
249         v->dev = NULL;
250 
251         if (!dev) {
252                 write_unlock_bh(&mrt_lock);
253                 return -EADDRNOTAVAIL;
254         }
255 
256 #ifdef CONFIG_IP_PIMSM
257         if (vifi == reg_vif_num)
258                 reg_vif_num = -1;
259 #endif
260 
261         if (vifi+1 == maxvif) {
262                 int tmp;
263                 for (tmp=vifi-1; tmp>=0; tmp--) {
264                         if (VIF_EXISTS(tmp))
265                                 break;
266                 }
267                 maxvif = tmp+1;
268         }
269 
270         write_unlock_bh(&mrt_lock);
271 
272         dev_set_allmulti(dev, -1);
273 
274         if ((in_dev = __in_dev_get(dev)) != NULL) {
275                 in_dev->cnf.mc_forwarding--;
276                 ip_rt_multicast_event(in_dev);
277         }
278 
279         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
280                 unregister_netdevice(dev);
281 
282         dev_put(dev);
283         return 0;
284 }
285 
286 /* Destroy an unresolved cache entry, killing queued skbs
287    and reporting error to netlink readers.
288  */
289 
290 static void ipmr_destroy_unres(struct mfc_cache *c)
291 {
292         struct sk_buff *skb;
293         struct nlmsgerr *e;
294 
295         atomic_dec(&cache_resolve_queue_len);
296 
297         while((skb=skb_dequeue(&c->mfc_un.unres.unresolved))) {
298                 if (skb->nh.iph->version == 0) {
299                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
300                         nlh->nlmsg_type = NLMSG_ERROR;
301                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
302                         skb_trim(skb, nlh->nlmsg_len);
303                         e = NLMSG_DATA(nlh);
304                         e->error = -ETIMEDOUT;
305                         memset(&e->msg, 0, sizeof(e->msg));
306                         netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
307                 } else
308                         kfree_skb(skb);
309         }
310 
311         kmem_cache_free(mrt_cachep, c);
312 }
313 
314 
315 /* Single timer process for all the unresolved queue. */
316 
317 void ipmr_expire_process(unsigned long dummy)
318 {
319         unsigned long now;
320         unsigned long expires;
321         struct mfc_cache *c, **cp;
322 
323         if (!spin_trylock(&mfc_unres_lock)) {
324                 mod_timer(&ipmr_expire_timer, jiffies+HZ/10);
325                 return;
326         }
327 
328         if (atomic_read(&cache_resolve_queue_len) == 0)
329                 goto out;
330 
331         now = jiffies;
332         expires = 10*HZ;
333         cp = &mfc_unres_queue;
334 
335         while ((c=*cp) != NULL) {
336                 long interval = c->mfc_un.unres.expires - now;
337 
338                 if (interval > 0) {
339                         if (interval < expires)
340                                 expires = interval;
341                         cp = &c->next;
342                         continue;
343                 }
344 
345                 *cp = c->next;
346 
347                 ipmr_destroy_unres(c);
348         }
349 
350         if (atomic_read(&cache_resolve_queue_len))
351                 mod_timer(&ipmr_expire_timer, jiffies + expires);
352 
353 out:
354         spin_unlock(&mfc_unres_lock);
355 }
356 
357 /* Fill oifs list. It is called under write locked mrt_lock. */
358 
359 static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
360 {
361         int vifi;
362 
363         cache->mfc_un.res.minvif = MAXVIFS;
364         cache->mfc_un.res.maxvif = 0;
365         memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
366 
367         for (vifi=0; vifi<maxvif; vifi++) {
368                 if (VIF_EXISTS(vifi) && ttls[vifi] && ttls[vifi] < 255) {
369                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
370                         if (cache->mfc_un.res.minvif > vifi)
371                                 cache->mfc_un.res.minvif = vifi;
372                         if (cache->mfc_un.res.maxvif <= vifi)
373                                 cache->mfc_un.res.maxvif = vifi + 1;
374                 }
375         }
376 }
377 
378 static int vif_add(struct vifctl *vifc, int mrtsock)
379 {
380         int vifi = vifc->vifc_vifi;
381         struct vif_device *v = &vif_table[vifi];
382         struct net_device *dev;
383         struct in_device *in_dev;
384 
385         /* Is vif busy ? */
386         if (VIF_EXISTS(vifi))
387                 return -EADDRINUSE;
388 
389         switch (vifc->vifc_flags) {
390 #ifdef CONFIG_IP_PIMSM
391         case VIFF_REGISTER:
392                 /*
393                  * Special Purpose VIF in PIM
394                  * All the packets will be sent to the daemon
395                  */
396                 if (reg_vif_num >= 0)
397                         return -EADDRINUSE;
398                 dev = ipmr_reg_vif(vifc);
399                 if (!dev)
400                         return -ENOBUFS;
401                 break;
402 #endif
403         case VIFF_TUNNEL:       
404                 dev = ipmr_new_tunnel(vifc);
405                 if (!dev)
406                         return -ENOBUFS;
407                 break;
408         case 0:
409                 dev=ip_dev_find(vifc->vifc_lcl_addr.s_addr);
410                 if (!dev)
411                         return -EADDRNOTAVAIL;
412                 __dev_put(dev);
413                 break;
414         default:
415                 return -EINVAL;
416         }
417 
418         if ((in_dev = __in_dev_get(dev)) == NULL)
419                 return -EADDRNOTAVAIL;
420         in_dev->cnf.mc_forwarding++;
421         dev_set_allmulti(dev, +1);
422         ip_rt_multicast_event(in_dev);
423 
424         /*
425          *      Fill in the VIF structures
426          */
427         v->rate_limit=vifc->vifc_rate_limit;
428         v->local=vifc->vifc_lcl_addr.s_addr;
429         v->remote=vifc->vifc_rmt_addr.s_addr;
430         v->flags=vifc->vifc_flags;
431         if (!mrtsock)
432                 v->flags |= VIFF_STATIC;
433         v->threshold=vifc->vifc_threshold;
434         v->bytes_in = 0;
435         v->bytes_out = 0;
436         v->pkt_in = 0;
437         v->pkt_out = 0;
438         v->link = dev->ifindex;
439         if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
440                 v->link = dev->iflink;
441 
442         /* And finish update writing critical data */
443         write_lock_bh(&mrt_lock);
444         dev_hold(dev);
445         v->dev=dev;
446 #ifdef CONFIG_IP_PIMSM
447         if (v->flags&VIFF_REGISTER)
448                 reg_vif_num = vifi;
449 #endif
450         if (vifi+1 > maxvif)
451                 maxvif = vifi+1;
452         write_unlock_bh(&mrt_lock);
453         return 0;
454 }
455 
456 static struct mfc_cache *ipmr_cache_find(__u32 origin, __u32 mcastgrp)
457 {
458         int line=MFC_HASH(mcastgrp,origin);
459         struct mfc_cache *c;
460 
461         for (c=mfc_cache_array[line]; c; c = c->next) {
462                 if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
463                         break;
464         }
465         return c;
466 }
467 
468 /*
469  *      Allocate a multicast cache entry
470  */
471 static struct mfc_cache *ipmr_cache_alloc(void)
472 {
473         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_KERNEL);
474         if(c==NULL)
475                 return NULL;
476         memset(c, 0, sizeof(*c));
477         c->mfc_un.res.minvif = MAXVIFS;
478         return c;
479 }
480 
481 static struct mfc_cache *ipmr_cache_alloc_unres(void)
482 {
483         struct mfc_cache *c=kmem_cache_alloc(mrt_cachep, GFP_ATOMIC);
484         if(c==NULL)
485                 return NULL;
486         memset(c, 0, sizeof(*c));
487         skb_queue_head_init(&c->mfc_un.unres.unresolved);
488         c->mfc_un.unres.expires = jiffies + 10*HZ;
489         return c;
490 }
491 
492 /*
493  *      A cache entry has gone into a resolved state from queued
494  */
495  
496 static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
497 {
498         struct sk_buff *skb;
499         struct nlmsgerr *e;
500 
501         /*
502          *      Play the pending entries through our router
503          */
504 
505         while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) {
506                 if (skb->nh.iph->version == 0) {
507                         int err;
508                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
509 
510                         if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
511                                 nlh->nlmsg_len = skb->tail - (u8*)nlh;
512                         } else {
513                                 nlh->nlmsg_type = NLMSG_ERROR;
514                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
515                                 skb_trim(skb, nlh->nlmsg_len);
516                                 e = NLMSG_DATA(nlh);
517                                 e->error = -EMSGSIZE;
518                                 memset(&e->msg, 0, sizeof(e->msg));
519                         }
520                         err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT);
521                 } else
522                         ip_mr_forward(skb, c, 0);
523         }
524 }
525 
526 /*
527  *      Bounce a cache query up to mrouted. We could use netlink for this but mrouted
528  *      expects the following bizarre scheme.
529  *
530  *      Called under mrt_lock.
531  */
532  
533 static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert)
534 {
535         struct sk_buff *skb;
536         int ihl = pkt->nh.iph->ihl<<2;
537         struct igmphdr *igmp;
538         struct igmpmsg *msg;
539         int ret;
540 
541 #ifdef CONFIG_IP_PIMSM
542         if (assert == IGMPMSG_WHOLEPKT)
543                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
544         else
545 #endif
546                 skb = alloc_skb(128, GFP_ATOMIC);
547 
548         if(!skb)
549                 return -ENOBUFS;
550 
551 #ifdef CONFIG_IP_PIMSM
552         if (assert == IGMPMSG_WHOLEPKT) {
553                 /* Ugly, but we have no choice with this interface.
554                    Duplicate old header, fix ihl, length etc.
555                    And all this only to mangle msg->im_msgtype and
556                    to set msg->im_mbz to "mbz" :-)
557                  */
558                 msg = (struct igmpmsg*)skb_push(skb, sizeof(struct iphdr));
559                 skb->nh.raw = skb->h.raw = (u8*)msg;
560                 memcpy(msg, pkt->nh.raw, sizeof(struct iphdr));
561                 msg->im_msgtype = IGMPMSG_WHOLEPKT;
562                 msg->im_mbz = 0;
563                 msg->im_vif = reg_vif_num;
564                 skb->nh.iph->ihl = sizeof(struct iphdr) >> 2;
565                 skb->nh.iph->tot_len = htons(ntohs(pkt->nh.iph->tot_len) + sizeof(struct iphdr));
566         } else 
567 #endif
568         {       
569                 
570         /*
571          *      Copy the IP header
572          */
573 
574         skb->nh.iph = (struct iphdr *)skb_put(skb, ihl);
575         memcpy(skb->data,pkt->data,ihl);
576         skb->nh.iph->protocol = 0;                      /* Flag to the kernel this is a route add */
577         msg = (struct igmpmsg*)skb->nh.iph;
578         msg->im_vif = vifi;
579         skb->dst = dst_clone(pkt->dst);
580 
581         /*
582          *      Add our header
583          */
584 
585         igmp=(struct igmphdr *)skb_put(skb,sizeof(struct igmphdr));
586         igmp->type      =
587         msg->im_msgtype = assert;
588         igmp->code      =       0;
589         skb->nh.iph->tot_len=htons(skb->len);                   /* Fix the length */
590         skb->h.raw = skb->nh.raw;
591         }
592 
593         if (mroute_socket == NULL) {
594                 kfree_skb(skb);
595                 return -EINVAL;
596         }
597 
598         /*
599          *      Deliver to mrouted
600          */
601         if ((ret=sock_queue_rcv_skb(mroute_socket,skb))<0) {
602                 if (net_ratelimit())
603                         printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
604                 kfree_skb(skb);
605         }
606 
607         return ret;
608 }
609 
610 /*
611  *      Queue a packet for resolution. It gets locked cache entry!
612  */
613  
614 static int
615 ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb)
616 {
617         int err;
618         struct mfc_cache *c;
619 
620         spin_lock_bh(&mfc_unres_lock);
621         for (c=mfc_unres_queue; c; c=c->next) {
622                 if (c->mfc_mcastgrp == skb->nh.iph->daddr &&
623                     c->mfc_origin == skb->nh.iph->saddr)
624                         break;
625         }
626 
627         if (c == NULL) {
628                 /*
629                  *      Create a new entry if allowable
630                  */
631 
632                 if (atomic_read(&cache_resolve_queue_len)>=10 ||
633                     (c=ipmr_cache_alloc_unres())==NULL) {
634                         spin_unlock_bh(&mfc_unres_lock);
635 
636                         kfree_skb(skb);
637                         return -ENOBUFS;
638                 }
639 
640                 /*
641                  *      Fill in the new cache entry
642                  */
643                 c->mfc_parent=-1;
644                 c->mfc_origin=skb->nh.iph->saddr;
645                 c->mfc_mcastgrp=skb->nh.iph->daddr;
646 
647                 /*
648                  *      Reflect first query at mrouted.
649                  */
650                 if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) {
651                         /* If the report failed throw the cache entry 
652                            out - Brad Parker
653                          */
654                         spin_unlock_bh(&mfc_unres_lock);
655 
656                         kmem_cache_free(mrt_cachep, c);
657                         kfree_skb(skb);
658                         return err;
659                 }
660 
661                 atomic_inc(&cache_resolve_queue_len);
662                 c->next = mfc_unres_queue;
663                 mfc_unres_queue = c;
664 
665                 mod_timer(&ipmr_expire_timer, c->mfc_un.unres.expires);
666         }
667 
668         /*
669          *      See if we can append the packet
670          */
671         if (c->mfc_un.unres.unresolved.qlen>3) {
672                 kfree_skb(skb);
673                 err = -ENOBUFS;
674         } else {
675                 skb_queue_tail(&c->mfc_un.unres.unresolved,skb);
676                 err = 0;
677         }
678 
679         spin_unlock_bh(&mfc_unres_lock);
680         return err;
681 }
682 
683 /*
684  *      MFC cache manipulation by user space mroute daemon
685  */
686 
687 int ipmr_mfc_delete(struct mfcctl *mfc)
688 {
689         int line;
690         struct mfc_cache *c, **cp;
691 
692         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
693 
694         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
695                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
696                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
697                         write_lock_bh(&mrt_lock);
698                         *cp = c->next;
699                         write_unlock_bh(&mrt_lock);
700 
701                         kmem_cache_free(mrt_cachep, c);
702                         return 0;
703                 }
704         }
705         return -ENOENT;
706 }
707 
708 int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
709 {
710         int line;
711         struct mfc_cache *uc, *c, **cp;
712 
713         line=MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
714 
715         for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) {
716                 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
717                     c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
718                         break;
719         }
720 
721         if (c != NULL) {
722                 write_lock_bh(&mrt_lock);
723                 c->mfc_parent = mfc->mfcc_parent;
724                 ipmr_update_threshoulds(c, mfc->mfcc_ttls);
725                 if (!mrtsock)
726                         c->mfc_flags |= MFC_STATIC;
727                 write_unlock_bh(&mrt_lock);
728                 return 0;
729         }
730 
731         if(!MULTICAST(mfc->mfcc_mcastgrp.s_addr))
732                 return -EINVAL;
733 
734         c=ipmr_cache_alloc();
735         if (c==NULL)
736                 return -ENOMEM;
737 
738         c->mfc_origin=mfc->mfcc_origin.s_addr;
739         c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
740         c->mfc_parent=mfc->mfcc_parent;
741         ipmr_update_threshoulds(c, mfc->mfcc_ttls);
742         if (!mrtsock)
743                 c->mfc_flags |= MFC_STATIC;
744 
745         write_lock_bh(&mrt_lock);
746         c->next = mfc_cache_array[line];
747         mfc_cache_array[line] = c;
748         write_unlock_bh(&mrt_lock);
749 
750         /*
751          *      Check to see if we resolved a queued list. If so we
752          *      need to send on the frames and tidy up.
753          */
754         spin_lock_bh(&mfc_unres_lock);
755         for (cp = &mfc_unres_queue; (uc=*cp) != NULL;
756              cp = &uc->next) {
757                 if (uc->mfc_origin == c->mfc_origin &&
758                     uc->mfc_mcastgrp == c->mfc_mcastgrp) {
759                         *cp = uc->next;
760                         if (atomic_dec_and_test(&cache_resolve_queue_len))
761                                 del_timer(&ipmr_expire_timer);
762                         break;
763                 }
764         }
765         spin_unlock_bh(&mfc_unres_lock);
766 
767         if (uc) {
768                 ipmr_cache_resolve(uc, c);
769                 kmem_cache_free(mrt_cachep, uc);
770         }
771         return 0;
772 }
773 
774 /*
775  *      Close the multicast socket, and clear the vif tables etc
776  */
777  
778 static void mroute_clean_tables(struct sock *sk)
779 {
780         int i;
781                 
782         /*
783          *      Shut down all active vif entries
784          */
785         for(i=0; i<maxvif; i++) {
786                 if (!(vif_table[i].flags&VIFF_STATIC))
787                         vif_delete(i);
788         }
789 
790         /*
791          *      Wipe the cache
792          */
793         for (i=0;i<MFC_LINES;i++) {
794                 struct mfc_cache *c, **cp;
795 
796                 cp = &mfc_cache_array[i];
797                 while ((c = *cp) != NULL) {
798                         if (c->mfc_flags&MFC_STATIC) {
799                                 cp = &c->next;
800                                 continue;
801                         }
802                         write_lock_bh(&mrt_lock);
803                         *cp = c->next;
804                         write_unlock_bh(&mrt_lock);
805 
806                         kmem_cache_free(mrt_cachep, c);
807                 }
808         }
809 
810         if (atomic_read(&cache_resolve_queue_len) != 0) {
811                 struct mfc_cache *c;
812 
813                 spin_lock_bh(&mfc_unres_lock);
814                 while (mfc_unres_queue != NULL) {
815                         c = mfc_unres_queue;
816                         mfc_unres_queue = c->next;
817                         spin_unlock_bh(&mfc_unres_lock);
818 
819                         ipmr_destroy_unres(c);
820 
821                         spin_lock_bh(&mfc_unres_lock);
822                 }
823                 spin_unlock_bh(&mfc_unres_lock);
824         }
825 }
826 
827 static void mrtsock_destruct(struct sock *sk)
828 {
829         rtnl_lock();
830         if (sk == mroute_socket) {
831                 ipv4_devconf.mc_forwarding--;
832 
833                 write_lock_bh(&mrt_lock);
834                 mroute_socket=NULL;
835                 write_unlock_bh(&mrt_lock);
836 
837                 mroute_clean_tables(sk);
838         }
839         rtnl_unlock();
840 }
841 
842 /*
843  *      Socket options and virtual interface manipulation. The whole
844  *      virtual interface system is a complete heap, but unfortunately
845  *      that's how BSD mrouted happens to think. Maybe one day with a proper
846  *      MOSPF/PIM router set up we can clean this up.
847  */
848  
849 int ip_mroute_setsockopt(struct sock *sk,int optname,char *optval,int optlen)
850 {
851         int ret;
852         struct vifctl vif;
853         struct mfcctl mfc;
854         
855         if(optname!=MRT_INIT)
856         {
857                 if(sk!=mroute_socket && !capable(CAP_NET_ADMIN))
858                         return -EACCES;
859         }
860 
861         switch(optname)
862         {
863                 case MRT_INIT:
864                         if(sk->type!=SOCK_RAW || sk->num!=IPPROTO_IGMP)
865                                 return -EOPNOTSUPP;
866                         if(optlen!=sizeof(int))
867                                 return -ENOPROTOOPT;
868 
869                         rtnl_lock();
870                         if (mroute_socket) {
871                                 rtnl_unlock();
872                                 return -EADDRINUSE;
873                         }
874 
875                         ret = ip_ra_control(sk, 1, mrtsock_destruct);
876                         if (ret == 0) {
877                                 write_lock_bh(&mrt_lock);
878                                 mroute_socket=sk;
879                                 write_unlock_bh(&mrt_lock);
880 
881                                 ipv4_devconf.mc_forwarding++;
882                         }
883                         rtnl_unlock();
884                         return ret;
885                 case MRT_DONE:
886                         if (sk!=mroute_socket)
887                                 return -EACCES;
888                         return ip_ra_control(sk, 0, NULL);
889                 case MRT_ADD_VIF:
890                 case MRT_DEL_VIF:
891                         if(optlen!=sizeof(vif))
892                                 return -EINVAL;
893                         if (copy_from_user(&vif,optval,sizeof(vif)))
894                                 return -EFAULT; 
895                         if(vif.vifc_vifi >= MAXVIFS)
896                                 return -ENFILE;
897                         rtnl_lock();
898                         if (optname==MRT_ADD_VIF) {
899                                 ret = vif_add(&vif, sk==mroute_socket);
900                         } else {
901                                 ret = vif_delete(vif.vifc_vifi);
902                         }
903                         rtnl_unlock();
904                         return ret;
905 
906                 /*
907                  *      Manipulate the forwarding caches. These live
908                  *      in a sort of kernel/user symbiosis.
909                  */
910                 case MRT_ADD_MFC:
911                 case MRT_DEL_MFC:
912                         if(optlen!=sizeof(mfc))
913                                 return -EINVAL;
914                         if (copy_from_user(&mfc,optval, sizeof(mfc)))
915                                 return -EFAULT;
916                         rtnl_lock();
917                         if (optname==MRT_DEL_MFC)
918                                 ret = ipmr_mfc_delete(&mfc);
919                         else
920                                 ret = ipmr_mfc_add(&mfc, sk==mroute_socket);
921                         rtnl_unlock();
922                         return ret;
923                 /*
924                  *      Control PIM assert.
925                  */
926                 case MRT_ASSERT:
927                 {
928                         int v;
929                         if(get_user(v,(int *)optval))
930                                 return -EFAULT;
931                         mroute_do_assert=(v)?1:0;
932                         return 0;
933                 }
934 #ifdef CONFIG_IP_PIMSM
935                 case MRT_PIM:
936                 {
937                         int v;
938                         if(get_user(v,(int *)optval))
939                                 return -EFAULT;
940                         v = (v)?1:0;
941                         rtnl_lock();
942                         if (v != mroute_do_pim) {
943                                 mroute_do_pim = v;
944                                 mroute_do_assert = v;
945 #ifdef CONFIG_IP_PIMSM_V2
946                                 if (mroute_do_pim)
947                                         inet_add_protocol(&pim_protocol);
948                                 else
949                                         inet_del_protocol(&pim_protocol);
950 #endif
951                         }
952                         rtnl_unlock();
953                         return 0;
954                 }
955 #endif
956                 /*
957                  *      Spurious command, or MRT_VERSION which you cannot
958                  *      set.
959                  */
960                 default:
961                         return -ENOPROTOOPT;
962         }
963 }
964 
965 /*
966  *      Getsock opt support for the multicast routing system.
967  */
968  
969 int ip_mroute_getsockopt(struct sock *sk,int optname,char *optval,int *optlen)
970 {
971         int olr;
972         int val;
973 
974         if(optname!=MRT_VERSION && 
975 #ifdef CONFIG_IP_PIMSM
976            optname!=MRT_PIM &&
977 #endif
978            optname!=MRT_ASSERT)
979                 return -ENOPROTOOPT;
980 
981         if (get_user(olr, optlen))
982                 return -EFAULT;
983 
984         olr = min_t(unsigned int, olr, sizeof(int));
985         if (olr < 0)
986                 return -EINVAL;
987                 
988         if(put_user(olr,optlen))
989                 return -EFAULT;
990         if(optname==MRT_VERSION)
991                 val=0x0305;
992 #ifdef CONFIG_IP_PIMSM
993         else if(optname==MRT_PIM)
994                 val=mroute_do_pim;
995 #endif
996         else
997                 val=mroute_do_assert;
998         if(copy_to_user(optval,&val,olr))
999                 return -EFAULT;
1000         return 0;
1001 }
1002 
1003 /*
1004  *      The IP multicast ioctl support routines.
1005  */
1006  
1007 int ipmr_ioctl(struct sock *sk, int cmd, unsigned long arg)
1008 {
1009         struct sioc_sg_req sr;
1010         struct sioc_vif_req vr;
1011         struct vif_device *vif;
1012         struct mfc_cache *c;
1013         
1014         switch(cmd)
1015         {
1016                 case SIOCGETVIFCNT:
1017                         if (copy_from_user(&vr,(void *)arg,sizeof(vr)))
1018                                 return -EFAULT; 
1019                         if(vr.vifi>=maxvif)
1020                                 return -EINVAL;
1021                         read_lock(&mrt_lock);
1022                         vif=&vif_table[vr.vifi];
1023                         if(VIF_EXISTS(vr.vifi)) {
1024                                 vr.icount=vif->pkt_in;
1025                                 vr.ocount=vif->pkt_out;
1026                                 vr.ibytes=vif->bytes_in;
1027                                 vr.obytes=vif->bytes_out;
1028                                 read_unlock(&mrt_lock);
1029 
1030                                 if (copy_to_user((void *)arg,&vr,sizeof(vr)))
1031                                         return -EFAULT;
1032                                 return 0;
1033                         }
1034                         read_unlock(&mrt_lock);
1035                         return -EADDRNOTAVAIL;
1036                 case SIOCGETSGCNT:
1037                         if (copy_from_user(&sr,(void *)arg,sizeof(sr)))
1038                                 return -EFAULT;
1039 
1040                         read_lock(&mrt_lock);
1041                         c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr);
1042                         if (c) {
1043                                 sr.pktcnt = c->mfc_un.res.pkt;
1044                                 sr.bytecnt = c->mfc_un.res.bytes;
1045                                 sr.wrong_if = c->mfc_un.res.wrong_if;
1046                                 read_unlock(&mrt_lock);
1047 
1048                                 if (copy_to_user((void *)arg,&sr,sizeof(sr)))
1049                                         return -EFAULT;
1050                                 return 0;
1051                         }
1052                         read_unlock(&mrt_lock);
1053                         return -EADDRNOTAVAIL;
1054                 default:
1055                         return -ENOIOCTLCMD;
1056         }
1057 }
1058 
1059 
1060 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1061 {
1062         struct vif_device *v;
1063         int ct;
1064         if (event != NETDEV_UNREGISTER)
1065                 return NOTIFY_DONE;
1066         v=&vif_table[0];
1067         for(ct=0;ct<maxvif;ct++,v++) {
1068                 if (v->dev==ptr)
1069                         vif_delete(ct);
1070         }
1071         return NOTIFY_DONE;
1072 }
1073 
1074 
1075 static struct notifier_block ip_mr_notifier={
1076         ipmr_device_event,
1077         NULL,
1078         0
1079 };
1080 
1081 /*
1082  *      Encapsulate a packet by attaching a valid IPIP header to it.
1083  *      This avoids tunnel drivers and other mess and gives us the speed so
1084  *      important for multicast video.
1085  */
1086  
1087 static void ip_encap(struct sk_buff *skb, u32 saddr, u32 daddr)
1088 {
1089         struct iphdr *iph = (struct iphdr *)skb_push(skb,sizeof(struct iphdr));
1090 
1091         iph->version    =       4;
1092         iph->tos        =       skb->nh.iph->tos;
1093         iph->ttl        =       skb->nh.iph->ttl;
1094         iph->frag_off   =       0;
1095         iph->daddr      =       daddr;
1096         iph->saddr      =       saddr;
1097         iph->protocol   =       IPPROTO_IPIP;
1098         iph->ihl        =       5;
1099         iph->tot_len    =       htons(skb->len);
1100         ip_select_ident(iph, skb->dst, NULL);
1101         ip_send_check(iph);
1102 
1103         skb->h.ipiph = skb->nh.iph;
1104         skb->nh.iph = iph;
1105         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1106         nf_reset(skb);
1107 }
1108 
1109 static inline int ipmr_forward_finish(struct sk_buff *skb)
1110 {
1111         struct ip_options *opt = &(IPCB(skb)->opt);
1112         struct dst_entry *dst = skb->dst;
1113 
1114         if (unlikely(opt->optlen))
1115                 ip_forward_options(skb);
1116 
1117         if (skb->len <= dst->pmtu)
1118                 return dst->output(skb);
1119         else
1120                 return ip_fragment(skb, dst->output);
1121 }
1122 
1123 /*
1124  *      Processing handlers for ipmr_forward
1125  */
1126 
1127 static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c,
1128                            int vifi, int last)
1129 {
1130         struct iphdr *iph = skb->nh.iph;
1131         struct vif_device *vif = &vif_table[vifi];
1132         struct net_device *dev;
1133         struct rtable *rt;
1134         int    encap = 0;
1135         struct sk_buff *skb2;
1136 
1137         if (vif->dev == NULL)
1138                 return;
1139 
1140 #ifdef CONFIG_IP_PIMSM
1141         if (vif->flags & VIFF_REGISTER) {
1142                 vif->pkt_out++;
1143                 vif->bytes_out+=skb->len;
1144                 ((struct net_device_stats*)vif->dev->priv)->tx_bytes += skb->len;
1145                 ((struct net_device_stats*)vif->dev->priv)->tx_packets++;
1146                 ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT);
1147                 return;
1148         }
1149 #endif
1150 
1151         if (vif->flags&VIFF_TUNNEL) {
1152                 if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
1153                         return;
1154                 encap = sizeof(struct iphdr);
1155         } else {
1156                 if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
1157                         return;
1158         }
1159 
1160         dev = rt->u.dst.dev;
1161 
1162         if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
1163                 /* Do not fragment multicasts. Alas, IPv4 does not
1164                    allow to send ICMP, so that packets will disappear
1165                    to blackhole.
1166                  */
1167 
1168                 IP_INC_STATS_BH(IpFragFails);
1169                 ip_rt_put(rt);
1170                 return;
1171         }
1172 
1173         encap += dev->hard_header_len;
1174 
1175         if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
1176                 skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
1177         else if (atomic_read(&skb->users) != 1)
1178                 skb2 = skb_clone(skb, GFP_ATOMIC);
1179         else {
1180                 atomic_inc(&skb->users);
1181                 skb2 = skb;
1182         }
1183 
1184         if (skb2 == NULL) {
1185                 ip_rt_put(rt);
1186                 return;
1187         }
1188 
1189         vif->pkt_out++;
1190         vif->bytes_out+=skb->len;
1191 
1192         dst_release(skb2->dst);
1193         skb2->dst = &rt->u.dst;
1194         iph = skb2->nh.iph;
1195         ip_decrease_ttl(iph);
1196 
1197         /* FIXME: forward and output firewalls used to be called here.
1198          * What do we do with netfilter? -- RR */
1199         if (vif->flags & VIFF_TUNNEL) {
1200                 ip_encap(skb2, vif->local, vif->remote);
1201                 /* FIXME: extra output firewall step used to be here. --RR */
1202                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_packets++;
1203                 ((struct ip_tunnel *)vif->dev->priv)->stat.tx_bytes+=skb2->len;
1204         }
1205 
1206         IPCB(skb2)->flags |= IPSKB_FORWARDED;
1207 
1208         /*
1209          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1210          * not only before forwarding, but after forwarding on all output
1211          * interfaces. It is clear, if mrouter runs a multicasting
1212          * program, it should receive packets not depending to what interface
1213          * program is joined.
1214          * If we will not make it, the program will have to join on all
1215          * interfaces. On the other hand, multihoming host (or router, but
1216          * not mrouter) cannot join to more than one interface - it will
1217          * result in receiving multiple packets.
1218          */
1219         NF_HOOK(PF_INET, NF_IP_FORWARD, skb2, skb->dev, dev, 
1220                 ipmr_forward_finish);
1221 }
1222 
1223 int ipmr_find_vif(struct net_device *dev)
1224 {
1225         int ct;
1226         for (ct=maxvif-1; ct>=0; ct--) {
1227                 if (vif_table[ct].dev == dev)
1228                         break;
1229         }
1230         return ct;
1231 }
1232 
1233 /* "local" means that we should preserve one skb (for local delivery) */
1234 
1235 int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
1236 {
1237         int psend = -1;
1238         int vif, ct;
1239 
1240         vif = cache->mfc_parent;
1241         cache->mfc_un.res.pkt++;
1242         cache->mfc_un.res.bytes += skb->len;
1243 
1244         /*
1245          * Wrong interface: drop packet and (maybe) send PIM assert.
1246          */
1247         if (vif_table[vif].dev != skb->dev) {
1248                 int true_vifi;
1249 
1250                 if (((struct rtable*)skb->dst)->key.iif == 0) {
1251                         /* It is our own packet, looped back.
1252                            Very complicated situation...
1253 
1254                            The best workaround until routing daemons will be
1255                            fixed is not to redistribute packet, if it was
1256                            send through wrong interface. It means, that
1257                            multicast applications WILL NOT work for
1258                            (S,G), which have default multicast route pointing
1259                            to wrong oif. In any case, it is not a good
1260                            idea to use multicasting applications on router.
1261                          */
1262                         goto dont_forward;
1263                 }
1264 
1265                 cache->mfc_un.res.wrong_if++;
1266                 true_vifi = ipmr_find_vif(skb->dev);
1267 
1268                 if (true_vifi >= 0 && mroute_do_assert &&
1269                     /* pimsm uses asserts, when switching from RPT to SPT,
1270                        so that we cannot check that packet arrived on an oif.
1271                        It is bad, but otherwise we would need to move pretty
1272                        large chunk of pimd to kernel. Ough... --ANK
1273                      */
1274                     (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) &&
1275                     jiffies - cache->mfc_un.res.last_assert > MFC_ASSERT_THRESH) {
1276                         cache->mfc_un.res.last_assert = jiffies;
1277                         ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF);
1278                 }
1279                 goto dont_forward;
1280         }
1281 
1282         vif_table[vif].pkt_in++;
1283         vif_table[vif].bytes_in+=skb->len;
1284 
1285         /*
1286          *      Forward the frame
1287          */
1288         for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1289                 if (skb->nh.iph->ttl > cache->mfc_un.res.ttls[ct]) {
1290                         if (psend != -1)
1291                                 ipmr_queue_xmit(skb, cache, psend, 0);
1292                         psend=ct;
1293                 }
1294         }
1295         if (psend != -1)
1296                 ipmr_queue_xmit(skb, cache, psend, !local);
1297 
1298 dont_forward:
1299         if (!local)
1300                 kfree_skb(skb);
1301         return 0;
1302 }
1303 
1304 
1305 /*
1306  *      Multicast packets for forwarding arrive here
1307  */
1308 
1309 int ip_mr_input(struct sk_buff *skb)
1310 {
1311         struct mfc_cache *cache;
1312         int local = ((struct rtable*)skb->dst)->rt_flags&RTCF_LOCAL;
1313 
1314         /* Packet is looped back after forward, it should not be
1315            forwarded second time, but still can be delivered locally.
1316          */
1317         if (IPCB(skb)->flags&IPSKB_FORWARDED)
1318                 goto dont_forward;
1319 
1320         if (!local) {
1321                     if (IPCB(skb)->opt.router_alert) {
1322                             if (ip_call_ra_chain(skb))
1323                                     return 0;
1324                     } else if (skb->nh.iph->protocol == IPPROTO_IGMP){
1325                             /* IGMPv1 (and broken IGMPv2 implementations sort of
1326                                Cisco IOS <= 11.2(8)) do not put router alert
1327                                option to IGMP packets destined to routable
1328                                groups. It is very bad, because it means
1329                                that we can forward NO IGMP messages.
1330                              */
1331                             read_lock(&mrt_lock);
1332                             if (mroute_socket) {
1333                                     raw_rcv(mroute_socket, skb);
1334                                     read_unlock(&mrt_lock);
1335                                     return 0;
1336                             }
1337                             read_unlock(&mrt_lock);
1338                     }
1339         }
1340 
1341         read_lock(&mrt_lock);
1342         cache = ipmr_cache_find(skb->nh.iph->saddr, skb->nh.iph->daddr);
1343 
1344         /*
1345          *      No usable cache entry
1346          */
1347         if (cache==NULL) {
1348                 int vif;
1349 
1350                 if (local) {
1351                         struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1352                         ip_local_deliver(skb);
1353                         if (skb2 == NULL) {
1354                                 read_unlock(&mrt_lock);
1355                                 return -ENOBUFS;
1356                         }
1357                         skb = skb2;
1358                 }
1359 
1360                 vif = ipmr_find_vif(skb->dev);
1361                 if (vif >= 0) {
1362                         int err = ipmr_cache_unresolved(vif, skb);
1363                         read_unlock(&mrt_lock);
1364 
1365                         return err;
1366                 }
1367                 read_unlock(&mrt_lock);
1368                 kfree_skb(skb);
1369                 return -ENODEV;
1370         }
1371 
1372         ip_mr_forward(skb, cache, local);
1373 
1374         read_unlock(&mrt_lock);
1375 
1376         if (local)
1377                 return ip_local_deliver(skb);
1378 
1379         return 0;
1380 
1381 dont_forward:
1382         if (local)
1383                 return ip_local_deliver(skb);
1384         kfree_skb(skb);
1385         return 0;
1386 }
1387 
1388 #ifdef CONFIG_IP_PIMSM_V1
1389 /*
1390  * Handle IGMP messages of PIMv1
1391  */
1392 
1393 int pim_rcv_v1(struct sk_buff * skb)
1394 {
1395         struct igmphdr *pim = (struct igmphdr*)skb->h.raw;
1396         struct iphdr   *encap;
1397         struct net_device  *reg_dev = NULL;
1398 
1399         if (skb_is_nonlinear(skb)) {
1400                 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1401                         kfree_skb(skb);
1402                         return -ENOMEM;
1403                 }
1404                 pim = (struct igmphdr*)skb->h.raw;
1405         }
1406 
1407         if (!mroute_do_pim ||
1408             skb->len < sizeof(*pim) + sizeof(*encap) ||
1409             pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
1410                 kfree_skb(skb);
1411                 return -EINVAL;
1412         }
1413 
1414         encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
1415         /*
1416            Check that:
1417            a. packet is really destinted to a multicast group
1418            b. packet is not a NULL-REGISTER
1419            c. packet is not truncated
1420          */
1421         if (!MULTICAST(encap->daddr) ||
1422             ntohs(encap->tot_len) == 0 ||
1423             ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1424                 kfree_skb(skb);
1425                 return -EINVAL;
1426         }
1427 
1428         read_lock(&mrt_lock);
1429         if (reg_vif_num >= 0)
1430                 reg_dev = vif_table[reg_vif_num].dev;
1431         if (reg_dev)
1432                 dev_hold(reg_dev);
1433         read_unlock(&mrt_lock);
1434 
1435         if (reg_dev == NULL) {
1436                 kfree_skb(skb);
1437                 return -EINVAL;
1438         }
1439 
1440         skb->mac.raw = skb->nh.raw;
1441         skb_pull(skb, (u8*)encap - skb->data);
1442         skb->nh.iph = (struct iphdr *)skb->data;
1443         skb->dev = reg_dev;
1444         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1445         skb->protocol = htons(ETH_P_IP);
1446         skb->ip_summed = 0;
1447         skb->pkt_type = PACKET_HOST;
1448         dst_release(skb->dst);
1449         skb->dst = NULL;
1450         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1451         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1452         nf_reset(skb);
1453         netif_rx(skb);
1454         dev_put(reg_dev);
1455         return 0;
1456 }
1457 #endif
1458 
1459 #ifdef CONFIG_IP_PIMSM_V2
1460 int pim_rcv(struct sk_buff * skb)
1461 {
1462         struct pimreghdr *pim = (struct pimreghdr*)skb->h.raw;
1463         struct iphdr   *encap;
1464         struct net_device  *reg_dev = NULL;
1465 
1466         if (skb_is_nonlinear(skb)) {
1467                 if (skb_linearize(skb, GFP_ATOMIC) != 0) {
1468                         kfree_skb(skb);
1469                         return -ENOMEM;
1470                 }
1471                 pim = (struct pimreghdr*)skb->h.raw;
1472         }
1473 
1474         if (skb->len < sizeof(*pim) + sizeof(*encap) ||
1475             pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1476             (pim->flags&PIM_NULL_REGISTER) ||
1477             (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1478              ip_compute_csum((void *)pim, skb->len))) {
1479                 kfree_skb(skb);
1480                 return -EINVAL;
1481         }
1482 
1483         /* check if the inner packet is destined to mcast group */
1484         encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
1485         if (!MULTICAST(encap->daddr) ||
1486             ntohs(encap->tot_len) == 0 ||
1487             ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
1488                 kfree_skb(skb);
1489                 return -EINVAL;
1490         }
1491 
1492         read_lock(&mrt_lock);
1493         if (reg_vif_num >= 0)
1494                 reg_dev = vif_table[reg_vif_num].dev;
1495         if (reg_dev)
1496                 dev_hold(reg_dev);
1497         read_unlock(&mrt_lock);
1498 
1499         if (reg_dev == NULL) {
1500                 kfree_skb(skb);
1501                 return -EINVAL;
1502         }
1503 
1504         skb->mac.raw = skb->nh.raw;
1505         skb_pull(skb, (u8*)encap - skb->data);
1506         skb->nh.iph = (struct iphdr *)skb->data;
1507         skb->dev = reg_dev;
1508         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
1509         skb->protocol = htons(ETH_P_IP);
1510         skb->ip_summed = 0;
1511         skb->pkt_type = PACKET_HOST;
1512         dst_release(skb->dst);
1513         ((struct net_device_stats*)reg_dev->priv)->rx_bytes += skb->len;
1514         ((struct net_device_stats*)reg_dev->priv)->rx_packets++;
1515         skb->dst = NULL;
1516         nf_reset(skb);
1517         netif_rx(skb);
1518         dev_put(reg_dev);
1519         return 0;
1520 }
1521 #endif
1522 
1523 static int
1524 ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
1525 {
1526         int ct;
1527         struct rtnexthop *nhp;
1528         struct net_device *dev = vif_table[c->mfc_parent].dev;
1529         u8 *b = skb->tail;
1530         struct rtattr *mp_head;
1531 
1532         if (dev)
1533                 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1534 
1535         mp_head = (struct rtattr*)skb_put(skb, RTA_LENGTH(0));
1536 
1537         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1538                 if (c->mfc_un.res.ttls[ct] < 255) {
1539                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1540                                 goto rtattr_failure;
1541                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1542                         nhp->rtnh_flags = 0;
1543                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1544                         nhp->rtnh_ifindex = vif_table[ct].dev->ifindex;
1545                         nhp->rtnh_len = sizeof(*nhp);
1546                 }
1547         }
1548         mp_head->rta_type = RTA_MULTIPATH;
1549         mp_head->rta_len = skb->tail - (u8*)mp_head;
1550         rtm->rtm_type = RTN_MULTICAST;
1551         return 1;
1552 
1553 rtattr_failure:
1554         skb_trim(skb, b - skb->data);
1555         return -EMSGSIZE;
1556 }
1557 
1558 int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1559 {
1560         int err;
1561         struct mfc_cache *cache;
1562         struct rtable *rt = (struct rtable*)skb->dst;
1563 
1564         read_lock(&mrt_lock);
1565         cache = ipmr_cache_find(rt->rt_src, rt->rt_dst);
1566 
1567         if (cache==NULL) {
1568                 struct net_device *dev;
1569                 int vif;
1570 
1571                 if (nowait) {
1572                         read_unlock(&mrt_lock);
1573                         return -EAGAIN;
1574                 }
1575 
1576                 dev = skb->dev;
1577                 if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
1578                         read_unlock(&mrt_lock);
1579                         return -ENODEV;
1580                 }
1581                 skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
1582                 skb->nh.iph->ihl = sizeof(struct iphdr)>>2;
1583                 skb->nh.iph->saddr = rt->rt_src;
1584                 skb->nh.iph->daddr = rt->rt_dst;
1585                 skb->nh.iph->version = 0;
1586                 err = ipmr_cache_unresolved(vif, skb);
1587                 read_unlock(&mrt_lock);
1588                 return err;
1589         }
1590 
1591         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1592                 cache->mfc_flags |= MFC_NOTIFY;
1593         err = ipmr_fill_mroute(skb, cache, rtm);
1594         read_unlock(&mrt_lock);
1595         return err;
1596 }
1597 
1598 #ifdef CONFIG_PROC_FS   
1599 /*
1600  *      The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
1601  */
1602  
1603 static int ipmr_vif_info(char *buffer, char **start, off_t offset, int length)
1604 {
1605         struct vif_device *vif;
1606         int len=0;
1607         off_t pos=0;
1608         off_t begin=0;
1609         int size;
1610         int ct;
1611 
1612         len += sprintf(buffer,
1613                  "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags Local    Remote\n");
1614         pos=len;
1615   
1616         read_lock(&mrt_lock);
1617         for (ct=0;ct<maxvif;ct++) 
1618         {
1619                 char *name = "none";
1620                 vif=&vif_table[ct];
1621                 if(!VIF_EXISTS(ct))
1622                         continue;
1623                 if (vif->dev)
1624                         name = vif->dev->name;
1625                 size = sprintf(buffer+len, "%2d %-10s %8ld %7ld  %8ld %7ld %05X %08X %08X\n",
1626                         ct, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out,
1627                         vif->flags, vif->local, vif->remote);
1628                 len+=size;
1629                 pos+=size;
1630                 if(pos<offset)
1631                 {
1632                         len=0;
1633                         begin=pos;
1634                 }
1635                 if(pos>offset+length)
1636                         break;
1637         }
1638         read_unlock(&mrt_lock);
1639         
1640         *start=buffer+(offset-begin);
1641         len-=(offset-begin);
1642         if(len>length)
1643                 len=length;
1644         if (len<0)
1645                 len = 0;
1646         return len;
1647 }
1648 
1649 static int ipmr_mfc_info(char *buffer, char **start, off_t offset, int length)
1650 {
1651         struct mfc_cache *mfc;
1652         int len=0;
1653         off_t pos=0;
1654         off_t begin=0;
1655         int size;
1656         int ct;
1657 
1658         len += sprintf(buffer,
1659                  "Group    Origin   Iif     Pkts    Bytes    Wrong Oifs\n");
1660         pos=len;
1661 
1662         read_lock(&mrt_lock);
1663         for (ct=0;ct<MFC_LINES;ct++) 
1664         {
1665                 for(mfc=mfc_cache_array[ct]; mfc; mfc=mfc->next)
1666                 {
1667                         int n;
1668 
1669                         /*
1670                          *      Interface forwarding map
1671                          */
1672                         size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld",
1673                                 (unsigned long)mfc->mfc_mcastgrp,
1674                                 (unsigned long)mfc->mfc_origin,
1675                                 mfc->mfc_parent,
1676                                 mfc->mfc_un.res.pkt,
1677                                 mfc->mfc_un.res.bytes,
1678                                 mfc->mfc_un.res.wrong_if);
1679                         for(n=mfc->mfc_un.res.minvif;n<mfc->mfc_un.res.maxvif;n++)
1680                         {
1681                                 if(VIF_EXISTS(n) && mfc->mfc_un.res.ttls[n] < 255)
1682                                         size += sprintf(buffer+len+size, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]);
1683                         }
1684                         size += sprintf(buffer+len+size, "\n");
1685                         len+=size;
1686                         pos+=size;
1687                         if(pos<offset)
1688                         {
1689                                 len=0;
1690                                 begin=pos;
1691                         }
1692                         if(pos>offset+length)
1693                                 goto done;
1694                 }
1695         }
1696 
1697         spin_lock_bh(&mfc_unres_lock);
1698         for(mfc=mfc_unres_queue; mfc; mfc=mfc->next) {
1699                 size = sprintf(buffer+len, "%08lX %08lX %-3d %8ld %8ld %8ld\n",
1700                                (unsigned long)mfc->mfc_mcastgrp,
1701                                (unsigned long)mfc->mfc_origin,
1702                                -1,
1703                                 (long)mfc->mfc_un.unres.unresolved.qlen,
1704                                 0L, 0L);
1705                 len+=size;
1706                 pos+=size;
1707                 if(pos<offset)
1708                 {
1709                         len=0;
1710                         begin=pos;
1711                 }
1712                 if(pos>offset+length)
1713                         break;
1714         }
1715         spin_unlock_bh(&mfc_unres_lock);
1716 
1717 done:
1718         read_unlock(&mrt_lock);
1719         *start=buffer+(offset-begin);
1720         len-=(offset-begin);
1721         if(len>length)
1722                 len=length;
1723         if (len < 0) {
1724                 len = 0;
1725         }
1726         return len;
1727 }
1728 
1729 #endif  
1730 
1731 #ifdef CONFIG_IP_PIMSM_V2
1732 struct inet_protocol pim_protocol = 
1733 {
1734         pim_rcv,                /* PIM handler          */
1735         NULL,                   /* PIM error control    */
1736         NULL,                   /* next                 */
1737         IPPROTO_PIM,            /* protocol ID          */
1738         0,                      /* copy                 */
1739         NULL,                   /* data                 */
1740         "PIM"                   /* name                 */
1741 };
1742 #endif
1743 
1744 
1745 /*
1746  *      Setup for IP multicast routing
1747  */
1748  
1749 void __init ip_mr_init(void)
1750 {
1751         printk(KERN_INFO "Linux IP multicast router 0.06 plus PIM-SM\n");
1752         mrt_cachep = kmem_cache_create("ip_mrt_cache",
1753                                        sizeof(struct mfc_cache),
1754                                        0, SLAB_HWCACHE_ALIGN,
1755                                        NULL, NULL);
1756         init_timer(&ipmr_expire_timer);
1757         ipmr_expire_timer.function=ipmr_expire_process;
1758         register_netdevice_notifier(&ip_mr_notifier);
1759 #ifdef CONFIG_PROC_FS   
1760         proc_net_create("ip_mr_vif",0,ipmr_vif_info);
1761         proc_net_create("ip_mr_cache",0,ipmr_mfc_info);
1762 #endif  
1763 }
1764 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp