~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv6/ip6_output.c

Version: ~ [ linux-5.15-rc7 ] ~ [ linux-5.14.14 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.75 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.155 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.213 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.252 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.287 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.289 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      IPv6 output functions
  3  *      Linux INET6 implementation
  4  *
  5  *      Authors:
  6  *      Pedro Roque             <roque@di.fc.ul.pt>
  7  *
  8  *      Based on linux/net/ipv4/ip_output.c
  9  *
 10  *      This program is free software; you can redistribute it and/or
 11  *      modify it under the terms of the GNU General Public License
 12  *      as published by the Free Software Foundation; either version
 13  *      2 of the License, or (at your option) any later version.
 14  *
 15  *      Changes:
 16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
 17  *                              extension headers are implemented.
 18  *                              route changes now work.
 19  *                              ip6_forward does not confuse sniffers.
 20  *                              etc.
 21  *
 22  *      H. von Brand    :       Added missing #include <linux/string.h>
 23  *      Imran Patel     :       frag id should be in NBO
 24  *      Kazunori MIYAZAWA @USAGI
 25  *                      :       add ip6_append_data and related functions
 26  *                              for datagram xmit
 27  */
 28 
 29 #include <linux/errno.h>
 30 #include <linux/kernel.h>
 31 #include <linux/string.h>
 32 #include <linux/socket.h>
 33 #include <linux/net.h>
 34 #include <linux/netdevice.h>
 35 #include <linux/if_arp.h>
 36 #include <linux/in6.h>
 37 #include <linux/tcp.h>
 38 #include <linux/route.h>
 39 #include <linux/module.h>
 40 
 41 #include <linux/netfilter.h>
 42 #include <linux/netfilter_ipv6.h>
 43 
 44 #include <net/sock.h>
 45 #include <net/snmp.h>
 46 
 47 #include <net/ipv6.h>
 48 #include <net/ndisc.h>
 49 #include <net/protocol.h>
 50 #include <net/ip6_route.h>
 51 #include <net/addrconf.h>
 52 #include <net/rawv6.h>
 53 #include <net/icmp.h>
 54 #include <net/xfrm.h>
 55 #include <net/checksum.h>
 56 #include <linux/mroute6.h>
 57 
 58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
 59 
 60 int __ip6_local_out(struct sk_buff *skb)
 61 {
 62         int len;
 63 
 64         len = skb->len - sizeof(struct ipv6hdr);
 65         if (len > IPV6_MAXPLEN)
 66                 len = 0;
 67         ipv6_hdr(skb)->payload_len = htons(len);
 68 
 69         return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
 70                        dst_output);
 71 }
 72 
 73 int ip6_local_out(struct sk_buff *skb)
 74 {
 75         int err;
 76 
 77         err = __ip6_local_out(skb);
 78         if (likely(err == 1))
 79                 err = dst_output(skb);
 80 
 81         return err;
 82 }
 83 EXPORT_SYMBOL_GPL(ip6_local_out);
 84 
 85 static int ip6_output_finish(struct sk_buff *skb)
 86 {
 87         struct dst_entry *dst = skb_dst(skb);
 88 
 89         if (dst->hh)
 90                 return neigh_hh_output(dst->hh, skb);
 91         else if (dst->neighbour)
 92                 return dst->neighbour->output(skb);
 93 
 94         IP6_INC_STATS(dev_net(dst->dev),
 95                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
 96         kfree_skb(skb);
 97         return -EINVAL;
 98 
 99 }
100 
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
103 {
104         skb_reset_mac_header(newskb);
105         __skb_pull(newskb, skb_network_offset(newskb));
106         newskb->pkt_type = PACKET_LOOPBACK;
107         newskb->ip_summed = CHECKSUM_UNNECESSARY;
108         WARN_ON(!skb_dst(newskb));
109 
110         netif_rx(newskb);
111         return 0;
112 }
113 
114 
115 static int ip6_output2(struct sk_buff *skb)
116 {
117         struct dst_entry *dst = skb_dst(skb);
118         struct net_device *dev = dst->dev;
119 
120         skb->protocol = htons(ETH_P_IPV6);
121         skb->dev = dev;
122 
123         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
125                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126 
127                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
128                     ((mroute6_socket(dev_net(dev)) &&
129                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131                                          &ipv6_hdr(skb)->saddr))) {
132                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133 
134                         /* Do not check for IFF_ALLMULTI; multicast routing
135                            is not supported in any case.
136                          */
137                         if (newskb)
138                                 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139                                         NULL, newskb->dev,
140                                         ip6_dev_loopback_xmit);
141 
142                         if (ipv6_hdr(skb)->hop_limit == 0) {
143                                 IP6_INC_STATS(dev_net(dev), idev,
144                                               IPSTATS_MIB_OUTDISCARDS);
145                                 kfree_skb(skb);
146                                 return 0;
147                         }
148                 }
149 
150                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151                                 skb->len);
152         }
153 
154         return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155                        ip6_output_finish);
156 }
157 
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 {
160         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161 
162         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163                skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
164 }
165 
166 int ip6_output(struct sk_buff *skb)
167 {
168         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169         if (unlikely(idev->cnf.disable_ipv6)) {
170                 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171                               IPSTATS_MIB_OUTDISCARDS);
172                 kfree_skb(skb);
173                 return 0;
174         }
175 
176         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177                                 dst_allfrag(skb_dst(skb)))
178                 return ip6_fragment(skb, ip6_output2);
179         else
180                 return ip6_output2(skb);
181 }
182 
183 /*
184  *      xmit an sk_buff (used by TCP)
185  */
186 
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188              struct ipv6_txoptions *opt, int ipfragok)
189 {
190         struct net *net = sock_net(sk);
191         struct ipv6_pinfo *np = inet6_sk(sk);
192         struct in6_addr *first_hop = &fl->fl6_dst;
193         struct dst_entry *dst = skb_dst(skb);
194         struct ipv6hdr *hdr;
195         u8  proto = fl->proto;
196         int seg_len = skb->len;
197         int hlimit = -1;
198         int tclass = 0;
199         u32 mtu;
200 
201         if (opt) {
202                 unsigned int head_room;
203 
204                 /* First: exthdrs may take lots of space (~8K for now)
205                    MAX_HEADER is not enough.
206                  */
207                 head_room = opt->opt_nflen + opt->opt_flen;
208                 seg_len += head_room;
209                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 
211                 if (skb_headroom(skb) < head_room) {
212                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
213                         if (skb2 == NULL) {
214                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
215                                               IPSTATS_MIB_OUTDISCARDS);
216                                 kfree_skb(skb);
217                                 return -ENOBUFS;
218                         }
219                         kfree_skb(skb);
220                         skb = skb2;
221                         if (sk)
222                                 skb_set_owner_w(skb, sk);
223                 }
224                 if (opt->opt_flen)
225                         ipv6_push_frag_opts(skb, opt, &proto);
226                 if (opt->opt_nflen)
227                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
228         }
229 
230         skb_push(skb, sizeof(struct ipv6hdr));
231         skb_reset_network_header(skb);
232         hdr = ipv6_hdr(skb);
233 
234         /* Allow local fragmentation. */
235         if (ipfragok)
236                 skb->local_df = 1;
237 
238         /*
239          *      Fill in the IPv6 header
240          */
241         if (np) {
242                 tclass = np->tclass;
243                 hlimit = np->hop_limit;
244         }
245         if (hlimit < 0)
246                 hlimit = ip6_dst_hoplimit(dst);
247 
248         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
249 
250         hdr->payload_len = htons(seg_len);
251         hdr->nexthdr = proto;
252         hdr->hop_limit = hlimit;
253 
254         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255         ipv6_addr_copy(&hdr->daddr, first_hop);
256 
257         skb->priority = sk->sk_priority;
258         skb->mark = sk->sk_mark;
259 
260         mtu = dst_mtu(dst);
261         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
262                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263                               IPSTATS_MIB_OUT, skb->len);
264                 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
265                                 dst_output);
266         }
267 
268         if (net_ratelimit())
269                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270         skb->dev = dst->dev;
271         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
272         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
273         kfree_skb(skb);
274         return -EMSGSIZE;
275 }
276 
277 EXPORT_SYMBOL(ip6_xmit);
278 
279 /*
280  *      To avoid extra problems ND packets are send through this
281  *      routine. It's code duplication but I really want to avoid
282  *      extra checks since ipv6_build_header is used by TCP (which
283  *      is for us performance critical)
284  */
285 
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287                const struct in6_addr *saddr, const struct in6_addr *daddr,
288                int proto, int len)
289 {
290         struct ipv6_pinfo *np = inet6_sk(sk);
291         struct ipv6hdr *hdr;
292         int totlen;
293 
294         skb->protocol = htons(ETH_P_IPV6);
295         skb->dev = dev;
296 
297         totlen = len + sizeof(struct ipv6hdr);
298 
299         skb_reset_network_header(skb);
300         skb_put(skb, sizeof(struct ipv6hdr));
301         hdr = ipv6_hdr(skb);
302 
303         *(__be32*)hdr = htonl(0x60000000);
304 
305         hdr->payload_len = htons(len);
306         hdr->nexthdr = proto;
307         hdr->hop_limit = np->hop_limit;
308 
309         ipv6_addr_copy(&hdr->saddr, saddr);
310         ipv6_addr_copy(&hdr->daddr, daddr);
311 
312         return 0;
313 }
314 
315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 {
317         struct ip6_ra_chain *ra;
318         struct sock *last = NULL;
319 
320         read_lock(&ip6_ra_lock);
321         for (ra = ip6_ra_chain; ra; ra = ra->next) {
322                 struct sock *sk = ra->sk;
323                 if (sk && ra->sel == sel &&
324                     (!sk->sk_bound_dev_if ||
325                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
326                         if (last) {
327                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328                                 if (skb2)
329                                         rawv6_rcv(last, skb2);
330                         }
331                         last = sk;
332                 }
333         }
334 
335         if (last) {
336                 rawv6_rcv(last, skb);
337                 read_unlock(&ip6_ra_lock);
338                 return 1;
339         }
340         read_unlock(&ip6_ra_lock);
341         return 0;
342 }
343 
344 static int ip6_forward_proxy_check(struct sk_buff *skb)
345 {
346         struct ipv6hdr *hdr = ipv6_hdr(skb);
347         u8 nexthdr = hdr->nexthdr;
348         int offset;
349 
350         if (ipv6_ext_hdr(nexthdr)) {
351                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352                 if (offset < 0)
353                         return 0;
354         } else
355                 offset = sizeof(struct ipv6hdr);
356 
357         if (nexthdr == IPPROTO_ICMPV6) {
358                 struct icmp6hdr *icmp6;
359 
360                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361                                          offset + 1 - skb->data)))
362                         return 0;
363 
364                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
365 
366                 switch (icmp6->icmp6_type) {
367                 case NDISC_ROUTER_SOLICITATION:
368                 case NDISC_ROUTER_ADVERTISEMENT:
369                 case NDISC_NEIGHBOUR_SOLICITATION:
370                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371                 case NDISC_REDIRECT:
372                         /* For reaction involving unicast neighbor discovery
373                          * message destined to the proxied address, pass it to
374                          * input function.
375                          */
376                         return 1;
377                 default:
378                         break;
379                 }
380         }
381 
382         /*
383          * The proxying router can't forward traffic sent to a link-local
384          * address, so signal the sender and discard the packet. This
385          * behavior is clarified by the MIPv6 specification.
386          */
387         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388                 dst_link_failure(skb);
389                 return -1;
390         }
391 
392         return 0;
393 }
394 
395 static inline int ip6_forward_finish(struct sk_buff *skb)
396 {
397         return dst_output(skb);
398 }
399 
400 int ip6_forward(struct sk_buff *skb)
401 {
402         struct dst_entry *dst = skb_dst(skb);
403         struct ipv6hdr *hdr = ipv6_hdr(skb);
404         struct inet6_skb_parm *opt = IP6CB(skb);
405         struct net *net = dev_net(dst->dev);
406 
407         if (net->ipv6.devconf_all->forwarding == 0)
408                 goto error;
409 
410         if (skb_warn_if_lro(skb))
411                 goto drop;
412 
413         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
414                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
415                 goto drop;
416         }
417 
418         skb_forward_csum(skb);
419 
420         /*
421          *      We DO NOT make any processing on
422          *      RA packets, pushing them to user level AS IS
423          *      without ane WARRANTY that application will be able
424          *      to interpret them. The reason is that we
425          *      cannot make anything clever here.
426          *
427          *      We are not end-node, so that if packet contains
428          *      AH/ESP, we cannot make anything.
429          *      Defragmentation also would be mistake, RA packets
430          *      cannot be fragmented, because there is no warranty
431          *      that different fragments will go along one path. --ANK
432          */
433         if (opt->ra) {
434                 u8 *ptr = skb_network_header(skb) + opt->ra;
435                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436                         return 0;
437         }
438 
439         /*
440          *      check and decrement ttl
441          */
442         if (hdr->hop_limit <= 1) {
443                 /* Force OUTPUT device used as source address */
444                 skb->dev = dst->dev;
445                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446                             0, skb->dev);
447                 IP6_INC_STATS_BH(net,
448                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
449 
450                 kfree_skb(skb);
451                 return -ETIMEDOUT;
452         }
453 
454         /* XXX: idev->cnf.proxy_ndp? */
455         if (net->ipv6.devconf_all->proxy_ndp &&
456             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
457                 int proxied = ip6_forward_proxy_check(skb);
458                 if (proxied > 0)
459                         return ip6_input(skb);
460                 else if (proxied < 0) {
461                         IP6_INC_STATS(net, ip6_dst_idev(dst),
462                                       IPSTATS_MIB_INDISCARDS);
463                         goto drop;
464                 }
465         }
466 
467         if (!xfrm6_route_forward(skb)) {
468                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
469                 goto drop;
470         }
471         dst = skb_dst(skb);
472 
473         /* IPv6 specs say nothing about it, but it is clear that we cannot
474            send redirects to source routed frames.
475            We don't send redirects to frames decapsulated from IPsec.
476          */
477         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
478             !skb_sec_path(skb)) {
479                 struct in6_addr *target = NULL;
480                 struct rt6_info *rt;
481                 struct neighbour *n = dst->neighbour;
482 
483                 /*
484                  *      incoming and outgoing devices are the same
485                  *      send a redirect.
486                  */
487 
488                 rt = (struct rt6_info *) dst;
489                 if ((rt->rt6i_flags & RTF_GATEWAY))
490                         target = (struct in6_addr*)&n->primary_key;
491                 else
492                         target = &hdr->daddr;
493 
494                 /* Limit redirects both by destination (here)
495                    and by source (inside ndisc_send_redirect)
496                  */
497                 if (xrlim_allow(dst, 1*HZ))
498                         ndisc_send_redirect(skb, n, target);
499         } else {
500                 int addrtype = ipv6_addr_type(&hdr->saddr);
501 
502                 /* This check is security critical. */
503                 if (addrtype == IPV6_ADDR_ANY ||
504                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
505                         goto error;
506                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
507                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
508                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
509                         goto error;
510                 }
511         }
512 
513         if (skb->len > dst_mtu(dst) && !skb_is_gso(skb)) {
514                 /* Again, force OUTPUT device used as source address */
515                 skb->dev = dst->dev;
516                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
517                 IP6_INC_STATS_BH(net,
518                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
519                 IP6_INC_STATS_BH(net,
520                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
521                 kfree_skb(skb);
522                 return -EMSGSIZE;
523         }
524 
525         if (skb_cow(skb, dst->dev->hard_header_len)) {
526                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
527                 goto drop;
528         }
529 
530         hdr = ipv6_hdr(skb);
531 
532         /* Mangling hops number delayed to point after skb COW */
533 
534         hdr->hop_limit--;
535 
536         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
537         return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
538                        ip6_forward_finish);
539 
540 error:
541         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
542 drop:
543         kfree_skb(skb);
544         return -EINVAL;
545 }
546 
547 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
548 {
549         to->pkt_type = from->pkt_type;
550         to->priority = from->priority;
551         to->protocol = from->protocol;
552         skb_dst_drop(to);
553         skb_dst_set(to, dst_clone(skb_dst(from)));
554         to->dev = from->dev;
555         to->mark = from->mark;
556 
557 #ifdef CONFIG_NET_SCHED
558         to->tc_index = from->tc_index;
559 #endif
560         nf_copy(to, from);
561 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
562     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
563         to->nf_trace = from->nf_trace;
564 #endif
565         skb_copy_secmark(to, from);
566 }
567 
568 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
569 {
570         u16 offset = sizeof(struct ipv6hdr);
571         struct ipv6_opt_hdr *exthdr =
572                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
573         unsigned int packet_len = skb->tail - skb->network_header;
574         int found_rhdr = 0;
575         *nexthdr = &ipv6_hdr(skb)->nexthdr;
576 
577         while (offset + 1 <= packet_len) {
578 
579                 switch (**nexthdr) {
580 
581                 case NEXTHDR_HOP:
582                         break;
583                 case NEXTHDR_ROUTING:
584                         found_rhdr = 1;
585                         break;
586                 case NEXTHDR_DEST:
587 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
588                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
589                                 break;
590 #endif
591                         if (found_rhdr)
592                                 return offset;
593                         break;
594                 default :
595                         return offset;
596                 }
597 
598                 offset += ipv6_optlen(exthdr);
599                 *nexthdr = &exthdr->nexthdr;
600                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
601                                                  offset);
602         }
603 
604         return offset;
605 }
606 
607 static u32 hashidentrnd __read_mostly;
608 #define FID_HASH_SZ 16
609 static u32 ipv6_fragmentation_id[FID_HASH_SZ];
610 
611 void __init initialize_hashidentrnd(void)
612 {
613         get_random_bytes(&hashidentrnd, sizeof(hashidentrnd));
614 }
615 
616 static u32 __ipv6_select_ident(const struct in6_addr *addr)
617 {
618         u32 newid, oldid, hash = jhash2((u32 *)addr, 4, hashidentrnd);
619         u32 *pid = &ipv6_fragmentation_id[hash % FID_HASH_SZ];
620 
621         do {
622                 oldid = *pid;
623                 newid = oldid + 1;
624                 if (!(hash + newid))
625                         newid++;
626         } while (cmpxchg(pid, oldid, newid) != oldid);
627 
628         return hash + newid;
629 }
630 
631 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
632 {
633         fhdr->identification = htonl(__ipv6_select_ident(&rt->rt6i_dst.addr));
634 }
635 
636 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
637 {
638         struct sk_buff *frag;
639         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
640         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
641         struct ipv6hdr *tmp_hdr;
642         struct frag_hdr *fh;
643         unsigned int mtu, hlen, left, len;
644         __be32 frag_id = 0;
645         int ptr, offset = 0, err=0;
646         u8 *prevhdr, nexthdr = 0;
647         struct net *net = dev_net(skb_dst(skb)->dev);
648 
649         hlen = ip6_find_1stfragopt(skb, &prevhdr);
650         nexthdr = *prevhdr;
651 
652         mtu = ip6_skb_dst_mtu(skb);
653 
654         /* We must not fragment if the socket is set to force MTU discovery
655          * or if the skb it not generated by a local socket.  (This last
656          * check should be redundant, but it's free.)
657          */
658         if (!skb->local_df) {
659                 skb->dev = skb_dst(skb)->dev;
660                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
661                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
662                               IPSTATS_MIB_FRAGFAILS);
663                 kfree_skb(skb);
664                 return -EMSGSIZE;
665         }
666 
667         if (np && np->frag_size < mtu) {
668                 if (np->frag_size)
669                         mtu = np->frag_size;
670         }
671         mtu -= hlen + sizeof(struct frag_hdr);
672 
673         if (skb_has_frags(skb)) {
674                 int first_len = skb_pagelen(skb);
675                 struct sk_buff *frag2;
676 
677                 if (first_len - hlen > mtu ||
678                     ((first_len - hlen) & 7) ||
679                     skb_cloned(skb))
680                         goto slow_path;
681 
682                 skb_walk_frags(skb, frag) {
683                         /* Correct geometry. */
684                         if (frag->len > mtu ||
685                             ((frag->len & 7) && frag->next) ||
686                             skb_headroom(frag) < hlen)
687                                 goto slow_path_clean;
688 
689                         /* Partially cloned skb? */
690                         if (skb_shared(frag))
691                                 goto slow_path_clean;
692 
693                         BUG_ON(frag->sk);
694                         if (skb->sk) {
695                                 frag->sk = skb->sk;
696                                 frag->destructor = sock_wfree;
697                         }
698                         skb->truesize -= frag->truesize;
699                 }
700 
701                 err = 0;
702                 offset = 0;
703                 frag = skb_shinfo(skb)->frag_list;
704                 skb_frag_list_init(skb);
705                 /* BUILD HEADER */
706 
707                 *prevhdr = NEXTHDR_FRAGMENT;
708                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
709                 if (!tmp_hdr) {
710                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
711                                       IPSTATS_MIB_FRAGFAILS);
712                         return -ENOMEM;
713                 }
714 
715                 __skb_pull(skb, hlen);
716                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
717                 __skb_push(skb, hlen);
718                 skb_reset_network_header(skb);
719                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
720 
721                 ipv6_select_ident(fh, rt);
722                 fh->nexthdr = nexthdr;
723                 fh->reserved = 0;
724                 fh->frag_off = htons(IP6_MF);
725                 frag_id = fh->identification;
726 
727                 first_len = skb_pagelen(skb);
728                 skb->data_len = first_len - skb_headlen(skb);
729                 skb->len = first_len;
730                 ipv6_hdr(skb)->payload_len = htons(first_len -
731                                                    sizeof(struct ipv6hdr));
732 
733                 dst_hold(&rt->u.dst);
734 
735                 for (;;) {
736                         /* Prepare header of the next frame,
737                          * before previous one went down. */
738                         if (frag) {
739                                 frag->ip_summed = CHECKSUM_NONE;
740                                 skb_reset_transport_header(frag);
741                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
742                                 __skb_push(frag, hlen);
743                                 skb_reset_network_header(frag);
744                                 memcpy(skb_network_header(frag), tmp_hdr,
745                                        hlen);
746                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
747                                 fh->nexthdr = nexthdr;
748                                 fh->reserved = 0;
749                                 fh->frag_off = htons(offset);
750                                 if (frag->next != NULL)
751                                         fh->frag_off |= htons(IP6_MF);
752                                 fh->identification = frag_id;
753                                 ipv6_hdr(frag)->payload_len =
754                                                 htons(frag->len -
755                                                       sizeof(struct ipv6hdr));
756                                 ip6_copy_metadata(frag, skb);
757                         }
758 
759                         err = output(skb);
760                         if(!err)
761                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
762                                               IPSTATS_MIB_FRAGCREATES);
763 
764                         if (err || !frag)
765                                 break;
766 
767                         skb = frag;
768                         frag = skb->next;
769                         skb->next = NULL;
770                 }
771 
772                 kfree(tmp_hdr);
773 
774                 if (err == 0) {
775                         IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
776                                       IPSTATS_MIB_FRAGOKS);
777                         dst_release(&rt->u.dst);
778                         return 0;
779                 }
780 
781                 while (frag) {
782                         skb = frag->next;
783                         kfree_skb(frag);
784                         frag = skb;
785                 }
786 
787                 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
788                               IPSTATS_MIB_FRAGFAILS);
789                 dst_release(&rt->u.dst);
790                 return err;
791 
792 slow_path_clean:
793                 skb_walk_frags(skb, frag2) {
794                         if (frag2 == frag)
795                                 break;
796                         frag2->sk = NULL;
797                         frag2->destructor = NULL;
798                         skb->truesize += frag2->truesize;
799                 }
800         }
801 
802 slow_path:
803         left = skb->len - hlen;         /* Space per frame */
804         ptr = hlen;                     /* Where to start from */
805 
806         /*
807          *      Fragment the datagram.
808          */
809 
810         *prevhdr = NEXTHDR_FRAGMENT;
811 
812         /*
813          *      Keep copying data until we run out.
814          */
815         while(left > 0) {
816                 len = left;
817                 /* IF: it doesn't fit, use 'mtu' - the data space left */
818                 if (len > mtu)
819                         len = mtu;
820                 /* IF: we are not sending upto and including the packet end
821                    then align the next start on an eight byte boundary */
822                 if (len < left) {
823                         len &= ~7;
824                 }
825                 /*
826                  *      Allocate buffer.
827                  */
828 
829                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
830                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
831                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
832                                       IPSTATS_MIB_FRAGFAILS);
833                         err = -ENOMEM;
834                         goto fail;
835                 }
836 
837                 /*
838                  *      Set up data on packet
839                  */
840 
841                 ip6_copy_metadata(frag, skb);
842                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
843                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
844                 skb_reset_network_header(frag);
845                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
846                 frag->transport_header = (frag->network_header + hlen +
847                                           sizeof(struct frag_hdr));
848 
849                 /*
850                  *      Charge the memory for the fragment to any owner
851                  *      it might possess
852                  */
853                 if (skb->sk)
854                         skb_set_owner_w(frag, skb->sk);
855 
856                 /*
857                  *      Copy the packet header into the new buffer.
858                  */
859                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
860 
861                 /*
862                  *      Build fragment header.
863                  */
864                 fh->nexthdr = nexthdr;
865                 fh->reserved = 0;
866                 if (!frag_id) {
867                         ipv6_select_ident(fh, rt);
868                         frag_id = fh->identification;
869                 } else
870                         fh->identification = frag_id;
871 
872                 /*
873                  *      Copy a block of the IP datagram.
874                  */
875                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
876                         BUG();
877                 left -= len;
878 
879                 fh->frag_off = htons(offset);
880                 if (left > 0)
881                         fh->frag_off |= htons(IP6_MF);
882                 ipv6_hdr(frag)->payload_len = htons(frag->len -
883                                                     sizeof(struct ipv6hdr));
884 
885                 ptr += len;
886                 offset += len;
887 
888                 /*
889                  *      Put this fragment into the sending queue.
890                  */
891                 err = output(frag);
892                 if (err)
893                         goto fail;
894 
895                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
896                               IPSTATS_MIB_FRAGCREATES);
897         }
898         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
899                       IPSTATS_MIB_FRAGOKS);
900         kfree_skb(skb);
901         return err;
902 
903 fail:
904         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
905                       IPSTATS_MIB_FRAGFAILS);
906         kfree_skb(skb);
907         return err;
908 }
909 
910 static inline int ip6_rt_check(struct rt6key *rt_key,
911                                struct in6_addr *fl_addr,
912                                struct in6_addr *addr_cache)
913 {
914         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
915                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
916 }
917 
918 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
919                                           struct dst_entry *dst,
920                                           struct flowi *fl)
921 {
922         struct ipv6_pinfo *np = inet6_sk(sk);
923         struct rt6_info *rt;
924 
925         if (!dst)
926                 goto out;
927 
928         if (dst->ops->family != AF_INET6) {
929                 dst_release(dst);
930                 return NULL;
931         }
932 
933         rt = (struct rt6_info *)dst;
934         /* Yes, checking route validity in not connected
935          * case is not very simple. Take into account,
936          * that we do not support routing by source, TOS,
937          * and MSG_DONTROUTE            --ANK (980726)
938          *
939          * 1. ip6_rt_check(): If route was host route,
940          *    check that cached destination is current.
941          *    If it is network route, we still may
942          *    check its validity using saved pointer
943          *    to the last used address: daddr_cache.
944          *    We do not want to save whole address now,
945          *    (because main consumer of this service
946          *    is tcp, which has not this problem),
947          *    so that the last trick works only on connected
948          *    sockets.
949          * 2. oif also should be the same.
950          */
951         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
952 #ifdef CONFIG_IPV6_SUBTREES
953             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
954 #endif
955             (fl->oif && fl->oif != dst->dev->ifindex)) {
956                 dst_release(dst);
957                 dst = NULL;
958         }
959 
960 out:
961         return dst;
962 }
963 
964 static int ip6_dst_lookup_tail(struct sock *sk,
965                                struct dst_entry **dst, struct flowi *fl)
966 {
967         int err;
968         struct net *net = sock_net(sk);
969 
970         if (*dst == NULL)
971                 *dst = ip6_route_output(net, sk, fl);
972 
973         if ((err = (*dst)->error))
974                 goto out_err_release;
975 
976         if (ipv6_addr_any(&fl->fl6_src)) {
977                 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
978                                          &fl->fl6_dst,
979                                          sk ? inet6_sk(sk)->srcprefs : 0,
980                                          &fl->fl6_src);
981                 if (err)
982                         goto out_err_release;
983         }
984 
985 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
986         /*
987          * Here if the dst entry we've looked up
988          * has a neighbour entry that is in the INCOMPLETE
989          * state and the src address from the flow is
990          * marked as OPTIMISTIC, we release the found
991          * dst entry and replace it instead with the
992          * dst entry of the nexthop router
993          */
994         if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
995                 struct inet6_ifaddr *ifp;
996                 struct flowi fl_gw;
997                 int redirect;
998 
999                 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
1000                                       (*dst)->dev, 1);
1001 
1002                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1003                 if (ifp)
1004                         in6_ifa_put(ifp);
1005 
1006                 if (redirect) {
1007                         /*
1008                          * We need to get the dst entry for the
1009                          * default router instead
1010                          */
1011                         dst_release(*dst);
1012                         memcpy(&fl_gw, fl, sizeof(struct flowi));
1013                         memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
1014                         *dst = ip6_route_output(net, sk, &fl_gw);
1015                         if ((err = (*dst)->error))
1016                                 goto out_err_release;
1017                 }
1018         }
1019 #endif
1020 
1021         return 0;
1022 
1023 out_err_release:
1024         if (err == -ENETUNREACH)
1025                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1026         dst_release(*dst);
1027         *dst = NULL;
1028         return err;
1029 }
1030 
1031 /**
1032  *      ip6_dst_lookup - perform route lookup on flow
1033  *      @sk: socket which provides route info
1034  *      @dst: pointer to dst_entry * for result
1035  *      @fl: flow to lookup
1036  *
1037  *      This function performs a route lookup on the given flow.
1038  *
1039  *      It returns zero on success, or a standard errno code on error.
1040  */
1041 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1042 {
1043         *dst = NULL;
1044         return ip6_dst_lookup_tail(sk, dst, fl);
1045 }
1046 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1047 
1048 /**
1049  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
1050  *      @sk: socket which provides the dst cache and route info
1051  *      @dst: pointer to dst_entry * for result
1052  *      @fl: flow to lookup
1053  *
1054  *      This function performs a route lookup on the given flow with the
1055  *      possibility of using the cached route in the socket if it is valid.
1056  *      It will take the socket dst lock when operating on the dst cache.
1057  *      As a result, this function can only be used in process context.
1058  *
1059  *      It returns zero on success, or a standard errno code on error.
1060  */
1061 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1062 {
1063         *dst = NULL;
1064         if (sk) {
1065                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1066                 *dst = ip6_sk_dst_check(sk, *dst, fl);
1067         }
1068 
1069         return ip6_dst_lookup_tail(sk, dst, fl);
1070 }
1071 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1072 
1073 static inline int ip6_ufo_append_data(struct sock *sk,
1074                         int getfrag(void *from, char *to, int offset, int len,
1075                         int odd, struct sk_buff *skb),
1076                         void *from, int length, int hh_len, int fragheaderlen,
1077                         int transhdrlen, int mtu,unsigned int flags,
1078                         struct rt6_info *rt)
1079 
1080 {
1081         struct sk_buff *skb;
1082         int err;
1083 
1084         /* There is support for UDP large send offload by network
1085          * device, so create one single skb packet containing complete
1086          * udp datagram
1087          */
1088         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1089                 struct frag_hdr fhdr;
1090 
1091                 skb = sock_alloc_send_skb(sk,
1092                         hh_len + fragheaderlen + transhdrlen + 20,
1093                         (flags & MSG_DONTWAIT), &err);
1094                 if (skb == NULL)
1095                         return -ENOMEM;
1096 
1097                 /* reserve space for Hardware header */
1098                 skb_reserve(skb, hh_len);
1099 
1100                 /* create space for UDP/IP header */
1101                 skb_put(skb,fragheaderlen + transhdrlen);
1102 
1103                 /* initialize network header pointer */
1104                 skb_reset_network_header(skb);
1105 
1106                 /* initialize protocol header pointer */
1107                 skb->transport_header = skb->network_header + fragheaderlen;
1108 
1109                 skb->ip_summed = CHECKSUM_PARTIAL;
1110                 skb->csum = 0;
1111                 sk->sk_sndmsg_off = 0;
1112 
1113                 /* Specify the length of each IPv6 datagram fragment.
1114                  * It has to be a multiple of 8.
1115                  */
1116                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1117                                              sizeof(struct frag_hdr)) & ~7;
1118                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1119                 ipv6_select_ident(&fhdr, rt);
1120                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1121                 __skb_queue_tail(&sk->sk_write_queue, skb);
1122         }
1123 
1124         return skb_append_datato_frags(sk, skb, getfrag, from,
1125                                        (length - transhdrlen));
1126 }
1127 
1128 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1129                                                gfp_t gfp)
1130 {
1131         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1132 }
1133 
1134 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1135                                                 gfp_t gfp)
1136 {
1137         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1138 }
1139 
1140 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1141         int offset, int len, int odd, struct sk_buff *skb),
1142         void *from, int length, int transhdrlen,
1143         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1144         struct rt6_info *rt, unsigned int flags)
1145 {
1146         struct inet_sock *inet = inet_sk(sk);
1147         struct ipv6_pinfo *np = inet6_sk(sk);
1148         struct sk_buff *skb;
1149         unsigned int maxfraglen, fragheaderlen;
1150         int exthdrlen;
1151         int hh_len;
1152         int mtu;
1153         int copy;
1154         int err;
1155         int offset = 0;
1156         int csummode = CHECKSUM_NONE;
1157 
1158         if (flags&MSG_PROBE)
1159                 return 0;
1160         if (skb_queue_empty(&sk->sk_write_queue)) {
1161                 /*
1162                  * setup for corking
1163                  */
1164                 if (opt) {
1165                         if (WARN_ON(np->cork.opt))
1166                                 return -EINVAL;
1167 
1168                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1169                         if (unlikely(np->cork.opt == NULL))
1170                                 return -ENOBUFS;
1171 
1172                         np->cork.opt->tot_len = opt->tot_len;
1173                         np->cork.opt->opt_flen = opt->opt_flen;
1174                         np->cork.opt->opt_nflen = opt->opt_nflen;
1175 
1176                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1177                                                             sk->sk_allocation);
1178                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1179                                 return -ENOBUFS;
1180 
1181                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1182                                                             sk->sk_allocation);
1183                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1184                                 return -ENOBUFS;
1185 
1186                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1187                                                            sk->sk_allocation);
1188                         if (opt->hopopt && !np->cork.opt->hopopt)
1189                                 return -ENOBUFS;
1190 
1191                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1192                                                             sk->sk_allocation);
1193                         if (opt->srcrt && !np->cork.opt->srcrt)
1194                                 return -ENOBUFS;
1195 
1196                         /* need source address above miyazawa*/
1197                 }
1198                 dst_hold(&rt->u.dst);
1199                 inet->cork.dst = &rt->u.dst;
1200                 inet->cork.fl = *fl;
1201                 np->cork.hop_limit = hlimit;
1202                 np->cork.tclass = tclass;
1203                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1204                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1205                 if (np->frag_size < mtu) {
1206                         if (np->frag_size)
1207                                 mtu = np->frag_size;
1208                 }
1209                 inet->cork.fragsize = mtu;
1210                 if (dst_allfrag(rt->u.dst.path))
1211                         inet->cork.flags |= IPCORK_ALLFRAG;
1212                 inet->cork.length = 0;
1213                 sk->sk_sndmsg_page = NULL;
1214                 sk->sk_sndmsg_off = 0;
1215                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1216                             rt->rt6i_nfheader_len;
1217                 length += exthdrlen;
1218                 transhdrlen += exthdrlen;
1219         } else {
1220                 rt = (struct rt6_info *)inet->cork.dst;
1221                 fl = &inet->cork.fl;
1222                 opt = np->cork.opt;
1223                 transhdrlen = 0;
1224                 exthdrlen = 0;
1225                 mtu = inet->cork.fragsize;
1226         }
1227 
1228         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1229 
1230         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1231                         (opt ? opt->opt_nflen : 0);
1232         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1233 
1234         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1235                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1236                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1237                         return -EMSGSIZE;
1238                 }
1239         }
1240 
1241         /*
1242          * Let's try using as much space as possible.
1243          * Use MTU if total length of the message fits into the MTU.
1244          * Otherwise, we need to reserve fragment header and
1245          * fragment alignment (= 8-15 octects, in total).
1246          *
1247          * Note that we may need to "move" the data from the tail of
1248          * of the buffer to the new fragment when we split
1249          * the message.
1250          *
1251          * FIXME: It may be fragmented into multiple chunks
1252          *        at once if non-fragmentable extension headers
1253          *        are too large.
1254          * --yoshfuji
1255          */
1256 
1257         inet->cork.length += length;
1258         skb = skb_peek_tail(&sk->sk_write_queue);
1259         if (((length > mtu) ||
1260              (skb && skb_has_frags(skb))) &&
1261             (sk->sk_protocol == IPPROTO_UDP) &&
1262             (rt->u.dst.dev->features & NETIF_F_UFO) &&
1263             (sk->sk_type == SOCK_DGRAM)) {
1264                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1265                                           hh_len, fragheaderlen,
1266                                           transhdrlen, mtu, flags, rt);
1267                 if (err)
1268                         goto error;
1269                 return 0;
1270         }
1271 
1272         if (!skb)
1273                 goto alloc_new_skb;
1274 
1275         while (length > 0) {
1276                 /* Check if the remaining data fits into current packet. */
1277                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1278                 if (copy < length)
1279                         copy = maxfraglen - skb->len;
1280 
1281                 if (copy <= 0) {
1282                         char *data;
1283                         unsigned int datalen;
1284                         unsigned int fraglen;
1285                         unsigned int fraggap;
1286                         unsigned int alloclen;
1287                         struct sk_buff *skb_prev;
1288 alloc_new_skb:
1289                         skb_prev = skb;
1290 
1291                         /* There's no room in the current skb */
1292                         if (skb_prev)
1293                                 fraggap = skb_prev->len - maxfraglen;
1294                         else
1295                                 fraggap = 0;
1296 
1297                         /*
1298                          * If remaining data exceeds the mtu,
1299                          * we know we need more fragment(s).
1300                          */
1301                         datalen = length + fraggap;
1302                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1303                                 datalen = maxfraglen - fragheaderlen;
1304 
1305                         fraglen = datalen + fragheaderlen;
1306                         if ((flags & MSG_MORE) &&
1307                             !(rt->u.dst.dev->features&NETIF_F_SG))
1308                                 alloclen = mtu;
1309                         else
1310                                 alloclen = datalen + fragheaderlen;
1311 
1312                         /*
1313                          * The last fragment gets additional space at tail.
1314                          * Note: we overallocate on fragments with MSG_MODE
1315                          * because we have no idea if we're the last one.
1316                          */
1317                         if (datalen == length + fraggap)
1318                                 alloclen += rt->u.dst.trailer_len;
1319 
1320                         /*
1321                          * We just reserve space for fragment header.
1322                          * Note: this may be overallocation if the message
1323                          * (without MSG_MORE) fits into the MTU.
1324                          */
1325                         alloclen += sizeof(struct frag_hdr);
1326 
1327                         if (transhdrlen) {
1328                                 skb = sock_alloc_send_skb(sk,
1329                                                 alloclen + hh_len,
1330                                                 (flags & MSG_DONTWAIT), &err);
1331                         } else {
1332                                 skb = NULL;
1333                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1334                                     2 * sk->sk_sndbuf)
1335                                         skb = sock_wmalloc(sk,
1336                                                            alloclen + hh_len, 1,
1337                                                            sk->sk_allocation);
1338                                 if (unlikely(skb == NULL))
1339                                         err = -ENOBUFS;
1340                         }
1341                         if (skb == NULL)
1342                                 goto error;
1343                         /*
1344                          *      Fill in the control structures
1345                          */
1346                         skb->ip_summed = csummode;
1347                         skb->csum = 0;
1348                         /* reserve for fragmentation */
1349                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1350 
1351                         /*
1352                          *      Find where to start putting bytes
1353                          */
1354                         data = skb_put(skb, fraglen);
1355                         skb_set_network_header(skb, exthdrlen);
1356                         data += fragheaderlen;
1357                         skb->transport_header = (skb->network_header +
1358                                                  fragheaderlen);
1359                         if (fraggap) {
1360                                 skb->csum = skb_copy_and_csum_bits(
1361                                         skb_prev, maxfraglen,
1362                                         data + transhdrlen, fraggap, 0);
1363                                 skb_prev->csum = csum_sub(skb_prev->csum,
1364                                                           skb->csum);
1365                                 data += fraggap;
1366                                 pskb_trim_unique(skb_prev, maxfraglen);
1367                         }
1368                         copy = datalen - transhdrlen - fraggap;
1369                         if (copy < 0) {
1370                                 err = -EINVAL;
1371                                 kfree_skb(skb);
1372                                 goto error;
1373                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1374                                 err = -EFAULT;
1375                                 kfree_skb(skb);
1376                                 goto error;
1377                         }
1378 
1379                         offset += copy;
1380                         length -= datalen - fraggap;
1381                         transhdrlen = 0;
1382                         exthdrlen = 0;
1383                         csummode = CHECKSUM_NONE;
1384 
1385                         /*
1386                          * Put the packet on the pending queue
1387                          */
1388                         __skb_queue_tail(&sk->sk_write_queue, skb);
1389                         continue;
1390                 }
1391 
1392                 if (copy > length)
1393                         copy = length;
1394 
1395                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1396                         unsigned int off;
1397 
1398                         off = skb->len;
1399                         if (getfrag(from, skb_put(skb, copy),
1400                                                 offset, copy, off, skb) < 0) {
1401                                 __skb_trim(skb, off);
1402                                 err = -EFAULT;
1403                                 goto error;
1404                         }
1405                 } else {
1406                         int i = skb_shinfo(skb)->nr_frags;
1407                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1408                         struct page *page = sk->sk_sndmsg_page;
1409                         int off = sk->sk_sndmsg_off;
1410                         unsigned int left;
1411 
1412                         if (page && (left = PAGE_SIZE - off) > 0) {
1413                                 if (copy >= left)
1414                                         copy = left;
1415                                 if (page != frag->page) {
1416                                         if (i == MAX_SKB_FRAGS) {
1417                                                 err = -EMSGSIZE;
1418                                                 goto error;
1419                                         }
1420                                         get_page(page);
1421                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1422                                         frag = &skb_shinfo(skb)->frags[i];
1423                                 }
1424                         } else if(i < MAX_SKB_FRAGS) {
1425                                 if (copy > PAGE_SIZE)
1426                                         copy = PAGE_SIZE;
1427                                 page = alloc_pages(sk->sk_allocation, 0);
1428                                 if (page == NULL) {
1429                                         err = -ENOMEM;
1430                                         goto error;
1431                                 }
1432                                 sk->sk_sndmsg_page = page;
1433                                 sk->sk_sndmsg_off = 0;
1434 
1435                                 skb_fill_page_desc(skb, i, page, 0, 0);
1436                                 frag = &skb_shinfo(skb)->frags[i];
1437                         } else {
1438                                 err = -EMSGSIZE;
1439                                 goto error;
1440                         }
1441                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1442                                 err = -EFAULT;
1443                                 goto error;
1444                         }
1445                         sk->sk_sndmsg_off += copy;
1446                         frag->size += copy;
1447                         skb->len += copy;
1448                         skb->data_len += copy;
1449                         skb->truesize += copy;
1450                         atomic_add(copy, &sk->sk_wmem_alloc);
1451                 }
1452                 offset += copy;
1453                 length -= copy;
1454         }
1455         return 0;
1456 error:
1457         inet->cork.length -= length;
1458         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1459         return err;
1460 }
1461 
1462 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1463 {
1464         if (np->cork.opt) {
1465                 kfree(np->cork.opt->dst0opt);
1466                 kfree(np->cork.opt->dst1opt);
1467                 kfree(np->cork.opt->hopopt);
1468                 kfree(np->cork.opt->srcrt);
1469                 kfree(np->cork.opt);
1470                 np->cork.opt = NULL;
1471         }
1472 
1473         if (inet->cork.dst) {
1474                 dst_release(inet->cork.dst);
1475                 inet->cork.dst = NULL;
1476                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1477         }
1478         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1479 }
1480 
1481 int ip6_push_pending_frames(struct sock *sk)
1482 {
1483         struct sk_buff *skb, *tmp_skb;
1484         struct sk_buff **tail_skb;
1485         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1486         struct inet_sock *inet = inet_sk(sk);
1487         struct ipv6_pinfo *np = inet6_sk(sk);
1488         struct net *net = sock_net(sk);
1489         struct ipv6hdr *hdr;
1490         struct ipv6_txoptions *opt = np->cork.opt;
1491         struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1492         struct flowi *fl = &inet->cork.fl;
1493         unsigned char proto = fl->proto;
1494         int err = 0;
1495 
1496         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1497                 goto out;
1498         tail_skb = &(skb_shinfo(skb)->frag_list);
1499 
1500         /* move skb->data to ip header from ext header */
1501         if (skb->data < skb_network_header(skb))
1502                 __skb_pull(skb, skb_network_offset(skb));
1503         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1504                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1505                 *tail_skb = tmp_skb;
1506                 tail_skb = &(tmp_skb->next);
1507                 skb->len += tmp_skb->len;
1508                 skb->data_len += tmp_skb->len;
1509                 skb->truesize += tmp_skb->truesize;
1510                 tmp_skb->destructor = NULL;
1511                 tmp_skb->sk = NULL;
1512         }
1513 
1514         /* Allow local fragmentation. */
1515         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1516                 skb->local_df = 1;
1517 
1518         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1519         __skb_pull(skb, skb_network_header_len(skb));
1520         if (opt && opt->opt_flen)
1521                 ipv6_push_frag_opts(skb, opt, &proto);
1522         if (opt && opt->opt_nflen)
1523                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1524 
1525         skb_push(skb, sizeof(struct ipv6hdr));
1526         skb_reset_network_header(skb);
1527         hdr = ipv6_hdr(skb);
1528 
1529         *(__be32*)hdr = fl->fl6_flowlabel |
1530                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1531 
1532         hdr->hop_limit = np->cork.hop_limit;
1533         hdr->nexthdr = proto;
1534         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1535         ipv6_addr_copy(&hdr->daddr, final_dst);
1536 
1537         skb->priority = sk->sk_priority;
1538         skb->mark = sk->sk_mark;
1539 
1540         skb_dst_set(skb, dst_clone(&rt->u.dst));
1541         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1542         if (proto == IPPROTO_ICMPV6) {
1543                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1544 
1545                 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1546                 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1547         }
1548 
1549         err = ip6_local_out(skb);
1550         if (err) {
1551                 if (err > 0)
1552                         err = net_xmit_errno(err);
1553                 if (err)
1554                         goto error;
1555         }
1556 
1557 out:
1558         ip6_cork_release(inet, np);
1559         return err;
1560 error:
1561         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1562         goto out;
1563 }
1564 
1565 void ip6_flush_pending_frames(struct sock *sk)
1566 {
1567         struct sk_buff *skb;
1568 
1569         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1570                 if (skb_dst(skb))
1571                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1572                                       IPSTATS_MIB_OUTDISCARDS);
1573                 kfree_skb(skb);
1574         }
1575 
1576         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1577 }
1578 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp