~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv6/ip6_output.c

Version: ~ [ linux-6.3-rc3 ] ~ [ linux-6.2.7 ] ~ [ linux-6.1.20 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.103 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.175 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.237 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.278 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.310 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  *      IPv6 output functions
  3  *      Linux INET6 implementation
  4  *
  5  *      Authors:
  6  *      Pedro Roque             <roque@di.fc.ul.pt>
  7  *
  8  *      Based on linux/net/ipv4/ip_output.c
  9  *
 10  *      This program is free software; you can redistribute it and/or
 11  *      modify it under the terms of the GNU General Public License
 12  *      as published by the Free Software Foundation; either version
 13  *      2 of the License, or (at your option) any later version.
 14  *
 15  *      Changes:
 16  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
 17  *                              extension headers are implemented.
 18  *                              route changes now work.
 19  *                              ip6_forward does not confuse sniffers.
 20  *                              etc.
 21  *
 22  *      H. von Brand    :       Added missing #include <linux/string.h>
 23  *      Imran Patel     :       frag id should be in NBO
 24  *      Kazunori MIYAZAWA @USAGI
 25  *                      :       add ip6_append_data and related functions
 26  *                              for datagram xmit
 27  */
 28 
 29 #include <linux/errno.h>
 30 #include <linux/kernel.h>
 31 #include <linux/string.h>
 32 #include <linux/socket.h>
 33 #include <linux/net.h>
 34 #include <linux/netdevice.h>
 35 #include <linux/if_arp.h>
 36 #include <linux/in6.h>
 37 #include <linux/tcp.h>
 38 #include <linux/route.h>
 39 #include <linux/module.h>
 40 #include <linux/slab.h>
 41 
 42 #include <linux/netfilter.h>
 43 #include <linux/netfilter_ipv6.h>
 44 
 45 #include <net/sock.h>
 46 #include <net/snmp.h>
 47 
 48 #include <net/ipv6.h>
 49 #include <net/ndisc.h>
 50 #include <net/protocol.h>
 51 #include <net/ip6_route.h>
 52 #include <net/addrconf.h>
 53 #include <net/rawv6.h>
 54 #include <net/icmp.h>
 55 #include <net/xfrm.h>
 56 #include <net/checksum.h>
 57 #include <linux/mroute6.h>
 58 
 59 int __ip6_local_out(struct sk_buff *skb)
 60 {
 61         int len;
 62 
 63         len = skb->len - sizeof(struct ipv6hdr);
 64         if (len > IPV6_MAXPLEN)
 65                 len = 0;
 66         ipv6_hdr(skb)->payload_len = htons(len);
 67 
 68         return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
 69                        skb_dst(skb)->dev, dst_output);
 70 }
 71 
 72 int ip6_local_out(struct sk_buff *skb)
 73 {
 74         int err;
 75 
 76         err = __ip6_local_out(skb);
 77         if (likely(err == 1))
 78                 err = dst_output(skb);
 79 
 80         return err;
 81 }
 82 EXPORT_SYMBOL_GPL(ip6_local_out);
 83 
 84 static int ip6_finish_output2(struct sk_buff *skb)
 85 {
 86         struct dst_entry *dst = skb_dst(skb);
 87         struct net_device *dev = dst->dev;
 88         struct neighbour *neigh;
 89         struct in6_addr *nexthop;
 90         int ret;
 91 
 92         skb->protocol = htons(ETH_P_IPV6);
 93         skb->dev = dev;
 94 
 95         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 96                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 97 
 98                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 99                     ((mroute6_socket(dev_net(dev), skb) &&
100                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
101                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
102                                          &ipv6_hdr(skb)->saddr))) {
103                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
104 
105                         /* Do not check for IFF_ALLMULTI; multicast routing
106                            is not supported in any case.
107                          */
108                         if (newskb)
109                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
110                                         newskb, NULL, newskb->dev,
111                                         dev_loopback_xmit);
112 
113                         if (ipv6_hdr(skb)->hop_limit == 0) {
114                                 IP6_INC_STATS(dev_net(dev), idev,
115                                               IPSTATS_MIB_OUTDISCARDS);
116                                 kfree_skb(skb);
117                                 return 0;
118                         }
119                 }
120 
121                 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
122                                 skb->len);
123 
124                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
125                     IPV6_ADDR_SCOPE_NODELOCAL &&
126                     !(dev->flags & IFF_LOOPBACK)) {
127                         kfree_skb(skb);
128                         return 0;
129                 }
130         }
131 
132         rcu_read_lock_bh();
133         nexthop = rt6_nexthop((struct rt6_info *)dst);
134         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
135         if (unlikely(!neigh))
136                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
137         if (!IS_ERR(neigh)) {
138                 ret = dst_neigh_output(dst, neigh, skb);
139                 rcu_read_unlock_bh();
140                 return ret;
141         }
142         rcu_read_unlock_bh();
143 
144         IP6_INC_STATS(dev_net(dst->dev),
145                       ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
146         kfree_skb(skb);
147         return -EINVAL;
148 }
149 
150 static int ip6_finish_output(struct sk_buff *skb)
151 {
152         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
153             dst_allfrag(skb_dst(skb)) ||
154             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
155                 return ip6_fragment(skb, ip6_finish_output2);
156         else
157                 return ip6_finish_output2(skb);
158 }
159 
160 int ip6_output(struct sk_buff *skb)
161 {
162         struct net_device *dev = skb_dst(skb)->dev;
163         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
164         if (unlikely(idev->cnf.disable_ipv6)) {
165                 IP6_INC_STATS(dev_net(dev), idev,
166                               IPSTATS_MIB_OUTDISCARDS);
167                 kfree_skb(skb);
168                 return 0;
169         }
170 
171         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
172                             ip6_finish_output,
173                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
174 }
175 
176 /*
177  *      xmit an sk_buff (used by TCP, SCTP and DCCP)
178  */
179 
180 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
181              struct ipv6_txoptions *opt, int tclass)
182 {
183         struct net *net = sock_net(sk);
184         struct ipv6_pinfo *np = inet6_sk(sk);
185         struct in6_addr *first_hop = &fl6->daddr;
186         struct dst_entry *dst = skb_dst(skb);
187         struct ipv6hdr *hdr;
188         u8  proto = fl6->flowi6_proto;
189         int seg_len = skb->len;
190         int hlimit = -1;
191         u32 mtu;
192 
193         if (opt) {
194                 unsigned int head_room;
195 
196                 /* First: exthdrs may take lots of space (~8K for now)
197                    MAX_HEADER is not enough.
198                  */
199                 head_room = opt->opt_nflen + opt->opt_flen;
200                 seg_len += head_room;
201                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
202 
203                 if (skb_headroom(skb) < head_room) {
204                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
205                         if (skb2 == NULL) {
206                                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
207                                               IPSTATS_MIB_OUTDISCARDS);
208                                 kfree_skb(skb);
209                                 return -ENOBUFS;
210                         }
211                         consume_skb(skb);
212                         skb = skb2;
213                         skb_set_owner_w(skb, sk);
214                 }
215                 if (opt->opt_flen)
216                         ipv6_push_frag_opts(skb, opt, &proto);
217                 if (opt->opt_nflen)
218                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
219         }
220 
221         skb_push(skb, sizeof(struct ipv6hdr));
222         skb_reset_network_header(skb);
223         hdr = ipv6_hdr(skb);
224 
225         /*
226          *      Fill in the IPv6 header
227          */
228         if (np)
229                 hlimit = np->hop_limit;
230         if (hlimit < 0)
231                 hlimit = ip6_dst_hoplimit(dst);
232 
233         ip6_flow_hdr(hdr, tclass, fl6->flowlabel);
234 
235         hdr->payload_len = htons(seg_len);
236         hdr->nexthdr = proto;
237         hdr->hop_limit = hlimit;
238 
239         hdr->saddr = fl6->saddr;
240         hdr->daddr = *first_hop;
241 
242         skb->priority = sk->sk_priority;
243         skb->mark = sk->sk_mark;
244 
245         mtu = dst_mtu(dst);
246         if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248                               IPSTATS_MIB_OUT, skb->len);
249                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250                                dst->dev, dst_output);
251         }
252 
253         skb->dev = dst->dev;
254         ipv6_local_error(sk, EMSGSIZE, fl6, mtu);
255         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
256         kfree_skb(skb);
257         return -EMSGSIZE;
258 }
259 
260 EXPORT_SYMBOL(ip6_xmit);
261 
262 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
263 {
264         struct ip6_ra_chain *ra;
265         struct sock *last = NULL;
266 
267         read_lock(&ip6_ra_lock);
268         for (ra = ip6_ra_chain; ra; ra = ra->next) {
269                 struct sock *sk = ra->sk;
270                 if (sk && ra->sel == sel &&
271                     (!sk->sk_bound_dev_if ||
272                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
273                         if (last) {
274                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
275                                 if (skb2)
276                                         rawv6_rcv(last, skb2);
277                         }
278                         last = sk;
279                 }
280         }
281 
282         if (last) {
283                 rawv6_rcv(last, skb);
284                 read_unlock(&ip6_ra_lock);
285                 return 1;
286         }
287         read_unlock(&ip6_ra_lock);
288         return 0;
289 }
290 
291 static int ip6_forward_proxy_check(struct sk_buff *skb)
292 {
293         struct ipv6hdr *hdr = ipv6_hdr(skb);
294         u8 nexthdr = hdr->nexthdr;
295         __be16 frag_off;
296         int offset;
297 
298         if (ipv6_ext_hdr(nexthdr)) {
299                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
300                 if (offset < 0)
301                         return 0;
302         } else
303                 offset = sizeof(struct ipv6hdr);
304 
305         if (nexthdr == IPPROTO_ICMPV6) {
306                 struct icmp6hdr *icmp6;
307 
308                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
309                                          offset + 1 - skb->data)))
310                         return 0;
311 
312                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
313 
314                 switch (icmp6->icmp6_type) {
315                 case NDISC_ROUTER_SOLICITATION:
316                 case NDISC_ROUTER_ADVERTISEMENT:
317                 case NDISC_NEIGHBOUR_SOLICITATION:
318                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
319                 case NDISC_REDIRECT:
320                         /* For reaction involving unicast neighbor discovery
321                          * message destined to the proxied address, pass it to
322                          * input function.
323                          */
324                         return 1;
325                 default:
326                         break;
327                 }
328         }
329 
330         /*
331          * The proxying router can't forward traffic sent to a link-local
332          * address, so signal the sender and discard the packet. This
333          * behavior is clarified by the MIPv6 specification.
334          */
335         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
336                 dst_link_failure(skb);
337                 return -1;
338         }
339 
340         return 0;
341 }
342 
343 static inline int ip6_forward_finish(struct sk_buff *skb)
344 {
345         return dst_output(skb);
346 }
347 
348 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
349 {
350         if (skb->len <= mtu)
351                 return false;
352 
353         /* ipv6 conntrack defrag sets max_frag_size + local_df */
354         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
355                 return true;
356 
357         if (skb->local_df)
358                 return false;
359 
360         if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
361                 return false;
362 
363         return true;
364 }
365 
366 int ip6_forward(struct sk_buff *skb)
367 {
368         struct dst_entry *dst = skb_dst(skb);
369         struct ipv6hdr *hdr = ipv6_hdr(skb);
370         struct inet6_skb_parm *opt = IP6CB(skb);
371         struct net *net = dev_net(dst->dev);
372         u32 mtu;
373 
374         if (net->ipv6.devconf_all->forwarding == 0)
375                 goto error;
376 
377         if (skb_warn_if_lro(skb))
378                 goto drop;
379 
380         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
381                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
382                 goto drop;
383         }
384 
385         if (skb->pkt_type != PACKET_HOST)
386                 goto drop;
387 
388         skb_forward_csum(skb);
389 
390         /*
391          *      We DO NOT make any processing on
392          *      RA packets, pushing them to user level AS IS
393          *      without ane WARRANTY that application will be able
394          *      to interpret them. The reason is that we
395          *      cannot make anything clever here.
396          *
397          *      We are not end-node, so that if packet contains
398          *      AH/ESP, we cannot make anything.
399          *      Defragmentation also would be mistake, RA packets
400          *      cannot be fragmented, because there is no warranty
401          *      that different fragments will go along one path. --ANK
402          */
403         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
404                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
405                         return 0;
406         }
407 
408         /*
409          *      check and decrement ttl
410          */
411         if (hdr->hop_limit <= 1) {
412                 /* Force OUTPUT device used as source address */
413                 skb->dev = dst->dev;
414                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
415                 IP6_INC_STATS_BH(net,
416                                  ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
417 
418                 kfree_skb(skb);
419                 return -ETIMEDOUT;
420         }
421 
422         /* XXX: idev->cnf.proxy_ndp? */
423         if (net->ipv6.devconf_all->proxy_ndp &&
424             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
425                 int proxied = ip6_forward_proxy_check(skb);
426                 if (proxied > 0)
427                         return ip6_input(skb);
428                 else if (proxied < 0) {
429                         IP6_INC_STATS(net, ip6_dst_idev(dst),
430                                       IPSTATS_MIB_INDISCARDS);
431                         goto drop;
432                 }
433         }
434 
435         if (!xfrm6_route_forward(skb)) {
436                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
437                 goto drop;
438         }
439         dst = skb_dst(skb);
440 
441         /* IPv6 specs say nothing about it, but it is clear that we cannot
442            send redirects to source routed frames.
443            We don't send redirects to frames decapsulated from IPsec.
444          */
445         if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
446                 struct in6_addr *target = NULL;
447                 struct inet_peer *peer;
448                 struct rt6_info *rt;
449 
450                 /*
451                  *      incoming and outgoing devices are the same
452                  *      send a redirect.
453                  */
454 
455                 rt = (struct rt6_info *) dst;
456                 if (rt->rt6i_flags & RTF_GATEWAY)
457                         target = &rt->rt6i_gateway;
458                 else
459                         target = &hdr->daddr;
460 
461                 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
462 
463                 /* Limit redirects both by destination (here)
464                    and by source (inside ndisc_send_redirect)
465                  */
466                 if (inet_peer_xrlim_allow(peer, 1*HZ))
467                         ndisc_send_redirect(skb, target);
468                 if (peer)
469                         inet_putpeer(peer);
470         } else {
471                 int addrtype = ipv6_addr_type(&hdr->saddr);
472 
473                 /* This check is security critical. */
474                 if (addrtype == IPV6_ADDR_ANY ||
475                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
476                         goto error;
477                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
478                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
479                                     ICMPV6_NOT_NEIGHBOUR, 0);
480                         goto error;
481                 }
482         }
483 
484         mtu = dst_mtu(dst);
485         if (mtu < IPV6_MIN_MTU)
486                 mtu = IPV6_MIN_MTU;
487 
488         if (ip6_pkt_too_big(skb, mtu)) {
489                 /* Again, force OUTPUT device used as source address */
490                 skb->dev = dst->dev;
491                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
492                 IP6_INC_STATS_BH(net,
493                                  ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
494                 IP6_INC_STATS_BH(net,
495                                  ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
496                 kfree_skb(skb);
497                 return -EMSGSIZE;
498         }
499 
500         if (skb_cow(skb, dst->dev->hard_header_len)) {
501                 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
502                 goto drop;
503         }
504 
505         hdr = ipv6_hdr(skb);
506 
507         /* Mangling hops number delayed to point after skb COW */
508 
509         hdr->hop_limit--;
510 
511         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
512         IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
513         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
514                        ip6_forward_finish);
515 
516 error:
517         IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
518 drop:
519         kfree_skb(skb);
520         return -EINVAL;
521 }
522 
523 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
524 {
525         to->pkt_type = from->pkt_type;
526         to->priority = from->priority;
527         to->protocol = from->protocol;
528         skb_dst_drop(to);
529         skb_dst_set(to, dst_clone(skb_dst(from)));
530         to->dev = from->dev;
531         to->mark = from->mark;
532 
533 #ifdef CONFIG_NET_SCHED
534         to->tc_index = from->tc_index;
535 #endif
536         nf_copy(to, from);
537 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
538         to->nf_trace = from->nf_trace;
539 #endif
540         skb_copy_secmark(to, from);
541 }
542 
543 static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
544 {
545         static u32 ip6_idents_hashrnd __read_mostly;
546         static bool hashrnd_initialized = false;
547         u32 hash, id;
548 
549         if (unlikely(!hashrnd_initialized)) {
550                 hashrnd_initialized = true;
551                 get_random_bytes(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd));
552         }
553         hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd);
554         hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash);
555 
556         id = ip_idents_reserve(hash, 1);
557         fhdr->identification = htonl(id);
558 }
559 
560 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
561 {
562         struct sk_buff *frag;
563         struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
564         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
565         struct ipv6hdr *tmp_hdr;
566         struct frag_hdr *fh;
567         unsigned int mtu, hlen, left, len;
568         int hroom, troom;
569         __be32 frag_id = 0;
570         int ptr, offset = 0, err=0;
571         u8 *prevhdr, nexthdr = 0;
572         struct net *net = dev_net(skb_dst(skb)->dev);
573 
574         hlen = ip6_find_1stfragopt(skb, &prevhdr);
575         nexthdr = *prevhdr;
576 
577         mtu = ip6_skb_dst_mtu(skb);
578 
579         /* We must not fragment if the socket is set to force MTU discovery
580          * or if the skb it not generated by a local socket.
581          */
582         if (unlikely(!skb->local_df && skb->len > mtu) ||
583                      (IP6CB(skb)->frag_max_size &&
584                       IP6CB(skb)->frag_max_size > mtu)) {
585                 if (skb->sk && dst_allfrag(skb_dst(skb)))
586                         sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
587 
588                 skb->dev = skb_dst(skb)->dev;
589                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
590                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
591                               IPSTATS_MIB_FRAGFAILS);
592                 kfree_skb(skb);
593                 return -EMSGSIZE;
594         }
595 
596         if (np && np->frag_size < mtu) {
597                 if (np->frag_size)
598                         mtu = np->frag_size;
599         }
600         mtu -= hlen + sizeof(struct frag_hdr);
601 
602         if (skb_has_frag_list(skb)) {
603                 int first_len = skb_pagelen(skb);
604                 struct sk_buff *frag2;
605 
606                 if (first_len - hlen > mtu ||
607                     ((first_len - hlen) & 7) ||
608                     skb_cloned(skb))
609                         goto slow_path;
610 
611                 skb_walk_frags(skb, frag) {
612                         /* Correct geometry. */
613                         if (frag->len > mtu ||
614                             ((frag->len & 7) && frag->next) ||
615                             skb_headroom(frag) < hlen)
616                                 goto slow_path_clean;
617 
618                         /* Partially cloned skb? */
619                         if (skb_shared(frag))
620                                 goto slow_path_clean;
621 
622                         BUG_ON(frag->sk);
623                         if (skb->sk) {
624                                 frag->sk = skb->sk;
625                                 frag->destructor = sock_wfree;
626                         }
627                         skb->truesize -= frag->truesize;
628                 }
629 
630                 err = 0;
631                 offset = 0;
632                 frag = skb_shinfo(skb)->frag_list;
633                 skb_frag_list_init(skb);
634                 /* BUILD HEADER */
635 
636                 *prevhdr = NEXTHDR_FRAGMENT;
637                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
638                 if (!tmp_hdr) {
639                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
640                                       IPSTATS_MIB_FRAGFAILS);
641                         return -ENOMEM;
642                 }
643 
644                 __skb_pull(skb, hlen);
645                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
646                 __skb_push(skb, hlen);
647                 skb_reset_network_header(skb);
648                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
649 
650                 ipv6_select_ident(fh, rt);
651                 fh->nexthdr = nexthdr;
652                 fh->reserved = 0;
653                 fh->frag_off = htons(IP6_MF);
654                 frag_id = fh->identification;
655 
656                 first_len = skb_pagelen(skb);
657                 skb->data_len = first_len - skb_headlen(skb);
658                 skb->len = first_len;
659                 ipv6_hdr(skb)->payload_len = htons(first_len -
660                                                    sizeof(struct ipv6hdr));
661 
662                 dst_hold(&rt->dst);
663 
664                 for (;;) {
665                         /* Prepare header of the next frame,
666                          * before previous one went down. */
667                         if (frag) {
668                                 frag->ip_summed = CHECKSUM_NONE;
669                                 skb_reset_transport_header(frag);
670                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
671                                 __skb_push(frag, hlen);
672                                 skb_reset_network_header(frag);
673                                 memcpy(skb_network_header(frag), tmp_hdr,
674                                        hlen);
675                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
676                                 fh->nexthdr = nexthdr;
677                                 fh->reserved = 0;
678                                 fh->frag_off = htons(offset);
679                                 if (frag->next != NULL)
680                                         fh->frag_off |= htons(IP6_MF);
681                                 fh->identification = frag_id;
682                                 ipv6_hdr(frag)->payload_len =
683                                                 htons(frag->len -
684                                                       sizeof(struct ipv6hdr));
685                                 ip6_copy_metadata(frag, skb);
686                         }
687 
688                         err = output(skb);
689                         if(!err)
690                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
691                                               IPSTATS_MIB_FRAGCREATES);
692 
693                         if (err || !frag)
694                                 break;
695 
696                         skb = frag;
697                         frag = skb->next;
698                         skb->next = NULL;
699                 }
700 
701                 kfree(tmp_hdr);
702 
703                 if (err == 0) {
704                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
705                                       IPSTATS_MIB_FRAGOKS);
706                         ip6_rt_put(rt);
707                         return 0;
708                 }
709 
710                 while (frag) {
711                         skb = frag->next;
712                         kfree_skb(frag);
713                         frag = skb;
714                 }
715 
716                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
717                               IPSTATS_MIB_FRAGFAILS);
718                 ip6_rt_put(rt);
719                 return err;
720 
721 slow_path_clean:
722                 skb_walk_frags(skb, frag2) {
723                         if (frag2 == frag)
724                                 break;
725                         frag2->sk = NULL;
726                         frag2->destructor = NULL;
727                         skb->truesize += frag2->truesize;
728                 }
729         }
730 
731 slow_path:
732         if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
733             skb_checksum_help(skb))
734                 goto fail;
735 
736         left = skb->len - hlen;         /* Space per frame */
737         ptr = hlen;                     /* Where to start from */
738 
739         /*
740          *      Fragment the datagram.
741          */
742 
743         hroom = LL_RESERVED_SPACE(rt->dst.dev);
744         troom = rt->dst.dev->needed_tailroom;
745 
746         /*
747          *      Keep copying data until we run out.
748          */
749         while(left > 0) {
750                 u8 *fragnexthdr_offset;
751 
752                 len = left;
753                 /* IF: it doesn't fit, use 'mtu' - the data space left */
754                 if (len > mtu)
755                         len = mtu;
756                 /* IF: we are not sending up to and including the packet end
757                    then align the next start on an eight byte boundary */
758                 if (len < left) {
759                         len &= ~7;
760                 }
761                 /*
762                  *      Allocate buffer.
763                  */
764 
765                 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
766                                       hroom + troom, GFP_ATOMIC)) == NULL) {
767                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
768                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
769                                       IPSTATS_MIB_FRAGFAILS);
770                         err = -ENOMEM;
771                         goto fail;
772                 }
773 
774                 /*
775                  *      Set up data on packet
776                  */
777 
778                 ip6_copy_metadata(frag, skb);
779                 skb_reserve(frag, hroom);
780                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
781                 skb_reset_network_header(frag);
782                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
783                 frag->transport_header = (frag->network_header + hlen +
784                                           sizeof(struct frag_hdr));
785 
786                 /*
787                  *      Charge the memory for the fragment to any owner
788                  *      it might possess
789                  */
790                 if (skb->sk)
791                         skb_set_owner_w(frag, skb->sk);
792 
793                 /*
794                  *      Copy the packet header into the new buffer.
795                  */
796                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
797 
798                 fragnexthdr_offset = skb_network_header(frag);
799                 fragnexthdr_offset += prevhdr - skb_network_header(skb);
800                 *fragnexthdr_offset = NEXTHDR_FRAGMENT;
801 
802                 /*
803                  *      Build fragment header.
804                  */
805                 fh->nexthdr = nexthdr;
806                 fh->reserved = 0;
807                 if (!frag_id) {
808                         ipv6_select_ident(fh, rt);
809                         frag_id = fh->identification;
810                 } else
811                         fh->identification = frag_id;
812 
813                 /*
814                  *      Copy a block of the IP datagram.
815                  */
816                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
817                         BUG();
818                 left -= len;
819 
820                 fh->frag_off = htons(offset);
821                 if (left > 0)
822                         fh->frag_off |= htons(IP6_MF);
823                 ipv6_hdr(frag)->payload_len = htons(frag->len -
824                                                     sizeof(struct ipv6hdr));
825 
826                 ptr += len;
827                 offset += len;
828 
829                 /*
830                  *      Put this fragment into the sending queue.
831                  */
832                 err = output(frag);
833                 if (err)
834                         goto fail;
835 
836                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
837                               IPSTATS_MIB_FRAGCREATES);
838         }
839         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
840                       IPSTATS_MIB_FRAGOKS);
841         consume_skb(skb);
842         return err;
843 
844 fail:
845         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
846                       IPSTATS_MIB_FRAGFAILS);
847         kfree_skb(skb);
848         return err;
849 }
850 
851 static inline int ip6_rt_check(const struct rt6key *rt_key,
852                                const struct in6_addr *fl_addr,
853                                const struct in6_addr *addr_cache)
854 {
855         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
856                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
857 }
858 
859 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
860                                           struct dst_entry *dst,
861                                           const struct flowi6 *fl6)
862 {
863         struct ipv6_pinfo *np = inet6_sk(sk);
864         struct rt6_info *rt;
865 
866         if (!dst)
867                 goto out;
868 
869         if (dst->ops->family != AF_INET6) {
870                 dst_release(dst);
871                 return NULL;
872         }
873 
874         rt = (struct rt6_info *)dst;
875         /* Yes, checking route validity in not connected
876          * case is not very simple. Take into account,
877          * that we do not support routing by source, TOS,
878          * and MSG_DONTROUTE            --ANK (980726)
879          *
880          * 1. ip6_rt_check(): If route was host route,
881          *    check that cached destination is current.
882          *    If it is network route, we still may
883          *    check its validity using saved pointer
884          *    to the last used address: daddr_cache.
885          *    We do not want to save whole address now,
886          *    (because main consumer of this service
887          *    is tcp, which has not this problem),
888          *    so that the last trick works only on connected
889          *    sockets.
890          * 2. oif also should be the same.
891          */
892         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
893 #ifdef CONFIG_IPV6_SUBTREES
894             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
895 #endif
896             (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
897                 dst_release(dst);
898                 dst = NULL;
899         }
900 
901 out:
902         return dst;
903 }
904 
905 static int ip6_dst_lookup_tail(struct sock *sk,
906                                struct dst_entry **dst, struct flowi6 *fl6)
907 {
908         struct net *net = sock_net(sk);
909 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
910         struct neighbour *n;
911         struct rt6_info *rt;
912 #endif
913         int err;
914 
915         if (*dst == NULL)
916                 *dst = ip6_route_output(net, sk, fl6);
917 
918         if ((err = (*dst)->error))
919                 goto out_err_release;
920 
921         if (ipv6_addr_any(&fl6->saddr)) {
922                 struct rt6_info *rt = (struct rt6_info *) *dst;
923                 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
924                                           sk ? inet6_sk(sk)->srcprefs : 0,
925                                           &fl6->saddr);
926                 if (err)
927                         goto out_err_release;
928         }
929 
930 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
931         /*
932          * Here if the dst entry we've looked up
933          * has a neighbour entry that is in the INCOMPLETE
934          * state and the src address from the flow is
935          * marked as OPTIMISTIC, we release the found
936          * dst entry and replace it instead with the
937          * dst entry of the nexthop router
938          */
939         rt = (struct rt6_info *) *dst;
940         rcu_read_lock_bh();
941         n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
942         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
943         rcu_read_unlock_bh();
944 
945         if (err) {
946                 struct inet6_ifaddr *ifp;
947                 struct flowi6 fl_gw6;
948                 int redirect;
949 
950                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
951                                       (*dst)->dev, 1);
952 
953                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
954                 if (ifp)
955                         in6_ifa_put(ifp);
956 
957                 if (redirect) {
958                         /*
959                          * We need to get the dst entry for the
960                          * default router instead
961                          */
962                         dst_release(*dst);
963                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
964                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
965                         *dst = ip6_route_output(net, sk, &fl_gw6);
966                         if ((err = (*dst)->error))
967                                 goto out_err_release;
968                 }
969         }
970 #endif
971 
972         return 0;
973 
974 out_err_release:
975         if (err == -ENETUNREACH)
976                 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
977         dst_release(*dst);
978         *dst = NULL;
979         return err;
980 }
981 
982 /**
983  *      ip6_dst_lookup - perform route lookup on flow
984  *      @sk: socket which provides route info
985  *      @dst: pointer to dst_entry * for result
986  *      @fl6: flow to lookup
987  *
988  *      This function performs a route lookup on the given flow.
989  *
990  *      It returns zero on success, or a standard errno code on error.
991  */
992 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
993 {
994         *dst = NULL;
995         return ip6_dst_lookup_tail(sk, dst, fl6);
996 }
997 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
998 
999 /**
1000  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1001  *      @sk: socket which provides route info
1002  *      @fl6: flow to lookup
1003  *      @final_dst: final destination address for ipsec lookup
1004  *      @can_sleep: we are in a sleepable context
1005  *
1006  *      This function performs a route lookup on the given flow.
1007  *
1008  *      It returns a valid dst pointer on success, or a pointer encoded
1009  *      error code.
1010  */
1011 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1012                                       const struct in6_addr *final_dst,
1013                                       bool can_sleep)
1014 {
1015         struct dst_entry *dst = NULL;
1016         int err;
1017 
1018         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1019         if (err)
1020                 return ERR_PTR(err);
1021         if (final_dst)
1022                 fl6->daddr = *final_dst;
1023         if (can_sleep)
1024                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1025 
1026         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1027 }
1028 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1029 
1030 /**
1031  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1032  *      @sk: socket which provides the dst cache and route info
1033  *      @fl6: flow to lookup
1034  *      @final_dst: final destination address for ipsec lookup
1035  *      @can_sleep: we are in a sleepable context
1036  *
1037  *      This function performs a route lookup on the given flow with the
1038  *      possibility of using the cached route in the socket if it is valid.
1039  *      It will take the socket dst lock when operating on the dst cache.
1040  *      As a result, this function can only be used in process context.
1041  *
1042  *      It returns a valid dst pointer on success, or a pointer encoded
1043  *      error code.
1044  */
1045 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1046                                          const struct in6_addr *final_dst,
1047                                          bool can_sleep)
1048 {
1049         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1050         int err;
1051 
1052         dst = ip6_sk_dst_check(sk, dst, fl6);
1053 
1054         err = ip6_dst_lookup_tail(sk, &dst, fl6);
1055         if (err)
1056                 return ERR_PTR(err);
1057         if (final_dst)
1058                 fl6->daddr = *final_dst;
1059         if (can_sleep)
1060                 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1061 
1062         return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1063 }
1064 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1065 
1066 static inline int ip6_ufo_append_data(struct sock *sk,
1067                         int getfrag(void *from, char *to, int offset, int len,
1068                         int odd, struct sk_buff *skb),
1069                         void *from, int length, int hh_len, int fragheaderlen,
1070                         int transhdrlen, int mtu,unsigned int flags,
1071                         struct rt6_info *rt)
1072 
1073 {
1074         struct sk_buff *skb;
1075         int err;
1076 
1077         /* There is support for UDP large send offload by network
1078          * device, so create one single skb packet containing complete
1079          * udp datagram
1080          */
1081         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1082                 struct frag_hdr fhdr;
1083 
1084                 skb = sock_alloc_send_skb(sk,
1085                         hh_len + fragheaderlen + transhdrlen + 20,
1086                         (flags & MSG_DONTWAIT), &err);
1087                 if (skb == NULL)
1088                         return err;
1089 
1090                 /* reserve space for Hardware header */
1091                 skb_reserve(skb, hh_len);
1092 
1093                 /* create space for UDP/IP header */
1094                 skb_put(skb,fragheaderlen + transhdrlen);
1095 
1096                 /* initialize network header pointer */
1097                 skb_reset_network_header(skb);
1098 
1099                 /* initialize protocol header pointer */
1100                 skb->transport_header = skb->network_header + fragheaderlen;
1101 
1102                 skb->ip_summed = CHECKSUM_PARTIAL;
1103                 skb->csum = 0;
1104 
1105                 /* Specify the length of each IPv6 datagram fragment.
1106                  * It has to be a multiple of 8.
1107                  */
1108                 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1109                                              sizeof(struct frag_hdr)) & ~7;
1110                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1111                 ipv6_select_ident(&fhdr, rt);
1112                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1113                 __skb_queue_tail(&sk->sk_write_queue, skb);
1114         }
1115 
1116         return skb_append_datato_frags(sk, skb, getfrag, from,
1117                                        (length - transhdrlen));
1118 }
1119 
1120 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1121                                                gfp_t gfp)
1122 {
1123         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1124 }
1125 
1126 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1127                                                 gfp_t gfp)
1128 {
1129         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1130 }
1131 
1132 static void ip6_append_data_mtu(unsigned int *mtu,
1133                                 int *maxfraglen,
1134                                 unsigned int fragheaderlen,
1135                                 struct sk_buff *skb,
1136                                 struct rt6_info *rt,
1137                                 unsigned int orig_mtu)
1138 {
1139         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1140                 if (skb == NULL) {
1141                         /* first fragment, reserve header_len */
1142                         *mtu = orig_mtu - rt->dst.header_len;
1143 
1144                 } else {
1145                         /*
1146                          * this fragment is not first, the headers
1147                          * space is regarded as data space.
1148                          */
1149                         *mtu = orig_mtu;
1150                 }
1151                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1152                               + fragheaderlen - sizeof(struct frag_hdr);
1153         }
1154 }
1155 
1156 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1157         int offset, int len, int odd, struct sk_buff *skb),
1158         void *from, int length, int transhdrlen,
1159         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1160         struct rt6_info *rt, unsigned int flags, int dontfrag)
1161 {
1162         struct inet_sock *inet = inet_sk(sk);
1163         struct ipv6_pinfo *np = inet6_sk(sk);
1164         struct inet_cork *cork;
1165         struct sk_buff *skb, *skb_prev = NULL;
1166         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
1167         int exthdrlen;
1168         int dst_exthdrlen;
1169         int hh_len;
1170         int copy;
1171         int err;
1172         int offset = 0;
1173         __u8 tx_flags = 0;
1174 
1175         if (flags&MSG_PROBE)
1176                 return 0;
1177         cork = &inet->cork.base;
1178         if (skb_queue_empty(&sk->sk_write_queue)) {
1179                 /*
1180                  * setup for corking
1181                  */
1182                 if (opt) {
1183                         if (WARN_ON(np->cork.opt))
1184                                 return -EINVAL;
1185 
1186                         np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
1187                         if (unlikely(np->cork.opt == NULL))
1188                                 return -ENOBUFS;
1189 
1190                         np->cork.opt->tot_len = opt->tot_len;
1191                         np->cork.opt->opt_flen = opt->opt_flen;
1192                         np->cork.opt->opt_nflen = opt->opt_nflen;
1193 
1194                         np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1195                                                             sk->sk_allocation);
1196                         if (opt->dst0opt && !np->cork.opt->dst0opt)
1197                                 return -ENOBUFS;
1198 
1199                         np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1200                                                             sk->sk_allocation);
1201                         if (opt->dst1opt && !np->cork.opt->dst1opt)
1202                                 return -ENOBUFS;
1203 
1204                         np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1205                                                            sk->sk_allocation);
1206                         if (opt->hopopt && !np->cork.opt->hopopt)
1207                                 return -ENOBUFS;
1208 
1209                         np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1210                                                             sk->sk_allocation);
1211                         if (opt->srcrt && !np->cork.opt->srcrt)
1212                                 return -ENOBUFS;
1213 
1214                         /* need source address above miyazawa*/
1215                 }
1216                 dst_hold(&rt->dst);
1217                 cork->dst = &rt->dst;
1218                 inet->cork.fl.u.ip6 = *fl6;
1219                 np->cork.hop_limit = hlimit;
1220                 np->cork.tclass = tclass;
1221                 if (rt->dst.flags & DST_XFRM_TUNNEL)
1222                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1223                               rt->dst.dev->mtu : dst_mtu(&rt->dst);
1224                 else
1225                         mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1226                               rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1227                 if (np->frag_size < mtu) {
1228                         if (np->frag_size)
1229                                 mtu = np->frag_size;
1230                 }
1231                 cork->fragsize = mtu;
1232                 if (dst_allfrag(rt->dst.path))
1233                         cork->flags |= IPCORK_ALLFRAG;
1234                 cork->length = 0;
1235                 exthdrlen = (opt ? opt->opt_flen : 0);
1236                 length += exthdrlen;
1237                 transhdrlen += exthdrlen;
1238                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1239         } else {
1240                 rt = (struct rt6_info *)cork->dst;
1241                 fl6 = &inet->cork.fl.u.ip6;
1242                 opt = np->cork.opt;
1243                 transhdrlen = 0;
1244                 exthdrlen = 0;
1245                 dst_exthdrlen = 0;
1246                 mtu = cork->fragsize;
1247         }
1248         orig_mtu = mtu;
1249 
1250         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1251 
1252         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1253                         (opt ? opt->opt_nflen : 0);
1254         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1255 
1256         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1257                 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1258                         ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1259                         return -EMSGSIZE;
1260                 }
1261         }
1262 
1263         /* For UDP, check if TX timestamp is enabled */
1264         if (sk->sk_type == SOCK_DGRAM)
1265                 sock_tx_timestamp(sk, &tx_flags);
1266 
1267         /*
1268          * Let's try using as much space as possible.
1269          * Use MTU if total length of the message fits into the MTU.
1270          * Otherwise, we need to reserve fragment header and
1271          * fragment alignment (= 8-15 octects, in total).
1272          *
1273          * Note that we may need to "move" the data from the tail of
1274          * of the buffer to the new fragment when we split
1275          * the message.
1276          *
1277          * FIXME: It may be fragmented into multiple chunks
1278          *        at once if non-fragmentable extension headers
1279          *        are too large.
1280          * --yoshfuji
1281          */
1282 
1283         if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
1284                                            sk->sk_protocol == IPPROTO_RAW)) {
1285                 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1286                 return -EMSGSIZE;
1287         }
1288 
1289         skb = skb_peek_tail(&sk->sk_write_queue);
1290         cork->length += length;
1291         if ((skb && skb_has_frags(skb)) ||
1292             (((length + fragheaderlen) > mtu) &&
1293             (skb_queue_len(&sk->sk_write_queue) <= 1) &&
1294             (sk->sk_protocol == IPPROTO_UDP) &&
1295             (rt->dst.dev->features & NETIF_F_UFO) &&
1296             (sk->sk_type == SOCK_DGRAM))) {
1297                 err = ip6_ufo_append_data(sk, getfrag, from, length,
1298                                           hh_len, fragheaderlen,
1299                                           transhdrlen, mtu, flags, rt);
1300                 if (err)
1301                         goto error;
1302                 return 0;
1303         }
1304 
1305         if (!skb)
1306                 goto alloc_new_skb;
1307 
1308         while (length > 0) {
1309                 /* Check if the remaining data fits into current packet. */
1310                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1311                 if (copy < length)
1312                         copy = maxfraglen - skb->len;
1313 
1314                 if (copy <= 0) {
1315                         char *data;
1316                         unsigned int datalen;
1317                         unsigned int fraglen;
1318                         unsigned int fraggap;
1319                         unsigned int alloclen;
1320 alloc_new_skb:
1321                         /* There's no room in the current skb */
1322                         if (skb)
1323                                 fraggap = skb->len - maxfraglen;
1324                         else
1325                                 fraggap = 0;
1326                         /* update mtu and maxfraglen if necessary */
1327                         if (skb == NULL || skb_prev == NULL)
1328                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1329                                                     fragheaderlen, skb, rt,
1330                                                     orig_mtu);
1331 
1332                         skb_prev = skb;
1333 
1334                         /*
1335                          * If remaining data exceeds the mtu,
1336                          * we know we need more fragment(s).
1337                          */
1338                         datalen = length + fraggap;
1339 
1340                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1341                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1342                         if ((flags & MSG_MORE) &&
1343                             !(rt->dst.dev->features&NETIF_F_SG))
1344                                 alloclen = mtu;
1345                         else
1346                                 alloclen = datalen + fragheaderlen;
1347 
1348                         alloclen += dst_exthdrlen;
1349 
1350                         if (datalen != length + fraggap) {
1351                                 /*
1352                                  * this is not the last fragment, the trailer
1353                                  * space is regarded as data space.
1354                                  */
1355                                 datalen += rt->dst.trailer_len;
1356                         }
1357 
1358                         alloclen += rt->dst.trailer_len;
1359                         fraglen = datalen + fragheaderlen;
1360 
1361                         /*
1362                          * We just reserve space for fragment header.
1363                          * Note: this may be overallocation if the message
1364                          * (without MSG_MORE) fits into the MTU.
1365                          */
1366                         alloclen += sizeof(struct frag_hdr);
1367 
1368                         if (transhdrlen) {
1369                                 skb = sock_alloc_send_skb(sk,
1370                                                 alloclen + hh_len,
1371                                                 (flags & MSG_DONTWAIT), &err);
1372                         } else {
1373                                 skb = NULL;
1374                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1375                                     2 * sk->sk_sndbuf)
1376                                         skb = sock_wmalloc(sk,
1377                                                            alloclen + hh_len, 1,
1378                                                            sk->sk_allocation);
1379                                 if (unlikely(skb == NULL))
1380                                         err = -ENOBUFS;
1381                                 else {
1382                                         /* Only the initial fragment
1383                                          * is time stamped.
1384                                          */
1385                                         tx_flags = 0;
1386                                 }
1387                         }
1388                         if (skb == NULL)
1389                                 goto error;
1390                         /*
1391                          *      Fill in the control structures
1392                          */
1393                         skb->ip_summed = CHECKSUM_NONE;
1394                         skb->csum = 0;
1395                         /* reserve for fragmentation and ipsec header */
1396                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1397                                     dst_exthdrlen);
1398 
1399                         if (sk->sk_type == SOCK_DGRAM)
1400                                 skb_shinfo(skb)->tx_flags = tx_flags;
1401 
1402                         /*
1403                          *      Find where to start putting bytes
1404                          */
1405                         data = skb_put(skb, fraglen);
1406                         skb_set_network_header(skb, exthdrlen);
1407                         data += fragheaderlen;
1408                         skb->transport_header = (skb->network_header +
1409                                                  fragheaderlen);
1410                         if (fraggap) {
1411                                 skb->csum = skb_copy_and_csum_bits(
1412                                         skb_prev, maxfraglen,
1413                                         data + transhdrlen, fraggap, 0);
1414                                 skb_prev->csum = csum_sub(skb_prev->csum,
1415                                                           skb->csum);
1416                                 data += fraggap;
1417                                 pskb_trim_unique(skb_prev, maxfraglen);
1418                         }
1419                         copy = datalen - transhdrlen - fraggap;
1420 
1421                         if (copy < 0) {
1422                                 err = -EINVAL;
1423                                 kfree_skb(skb);
1424                                 goto error;
1425                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1426                                 err = -EFAULT;
1427                                 kfree_skb(skb);
1428                                 goto error;
1429                         }
1430 
1431                         offset += copy;
1432                         length -= datalen - fraggap;
1433                         transhdrlen = 0;
1434                         exthdrlen = 0;
1435                         dst_exthdrlen = 0;
1436 
1437                         /*
1438                          * Put the packet on the pending queue
1439                          */
1440                         __skb_queue_tail(&sk->sk_write_queue, skb);
1441                         continue;
1442                 }
1443 
1444                 if (copy > length)
1445                         copy = length;
1446 
1447                 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1448                         unsigned int off;
1449 
1450                         off = skb->len;
1451                         if (getfrag(from, skb_put(skb, copy),
1452                                                 offset, copy, off, skb) < 0) {
1453                                 __skb_trim(skb, off);
1454                                 err = -EFAULT;
1455                                 goto error;
1456                         }
1457                 } else {
1458                         int i = skb_shinfo(skb)->nr_frags;
1459                         struct page_frag *pfrag = sk_page_frag(sk);
1460 
1461                         err = -ENOMEM;
1462                         if (!sk_page_frag_refill(sk, pfrag))
1463                                 goto error;
1464 
1465                         if (!skb_can_coalesce(skb, i, pfrag->page,
1466                                               pfrag->offset)) {
1467                                 err = -EMSGSIZE;
1468                                 if (i == MAX_SKB_FRAGS)
1469                                         goto error;
1470 
1471                                 __skb_fill_page_desc(skb, i, pfrag->page,
1472                                                      pfrag->offset, 0);
1473                                 skb_shinfo(skb)->nr_frags = ++i;
1474                                 get_page(pfrag->page);
1475                         }
1476                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1477                         if (getfrag(from,
1478                                     page_address(pfrag->page) + pfrag->offset,
1479                                     offset, copy, skb->len, skb) < 0)
1480                                 goto error_efault;
1481 
1482                         pfrag->offset += copy;
1483                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1484                         skb->len += copy;
1485                         skb->data_len += copy;
1486                         skb->truesize += copy;
1487                         atomic_add(copy, &sk->sk_wmem_alloc);
1488                 }
1489                 offset += copy;
1490                 length -= copy;
1491         }
1492 
1493         return 0;
1494 
1495 error_efault:
1496         err = -EFAULT;
1497 error:
1498         cork->length -= length;
1499         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1500         return err;
1501 }
1502 EXPORT_SYMBOL_GPL(ip6_append_data);
1503 
1504 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1505 {
1506         if (np->cork.opt) {
1507                 kfree(np->cork.opt->dst0opt);
1508                 kfree(np->cork.opt->dst1opt);
1509                 kfree(np->cork.opt->hopopt);
1510                 kfree(np->cork.opt->srcrt);
1511                 kfree(np->cork.opt);
1512                 np->cork.opt = NULL;
1513         }
1514 
1515         if (inet->cork.base.dst) {
1516                 dst_release(inet->cork.base.dst);
1517                 inet->cork.base.dst = NULL;
1518                 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1519         }
1520         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1521 }
1522 
1523 int ip6_push_pending_frames(struct sock *sk)
1524 {
1525         struct sk_buff *skb, *tmp_skb;
1526         struct sk_buff **tail_skb;
1527         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1528         struct inet_sock *inet = inet_sk(sk);
1529         struct ipv6_pinfo *np = inet6_sk(sk);
1530         struct net *net = sock_net(sk);
1531         struct ipv6hdr *hdr;
1532         struct ipv6_txoptions *opt = np->cork.opt;
1533         struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1534         struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1535         unsigned char proto = fl6->flowi6_proto;
1536         int err = 0;
1537 
1538         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1539                 goto out;
1540         tail_skb = &(skb_shinfo(skb)->frag_list);
1541 
1542         /* move skb->data to ip header from ext header */
1543         if (skb->data < skb_network_header(skb))
1544                 __skb_pull(skb, skb_network_offset(skb));
1545         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1546                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1547                 *tail_skb = tmp_skb;
1548                 tail_skb = &(tmp_skb->next);
1549                 skb->len += tmp_skb->len;
1550                 skb->data_len += tmp_skb->len;
1551                 skb->truesize += tmp_skb->truesize;
1552                 tmp_skb->destructor = NULL;
1553                 tmp_skb->sk = NULL;
1554         }
1555 
1556         /* Allow local fragmentation. */
1557         if (np->pmtudisc < IPV6_PMTUDISC_DO)
1558                 skb->local_df = 1;
1559 
1560         *final_dst = fl6->daddr;
1561         __skb_pull(skb, skb_network_header_len(skb));
1562         if (opt && opt->opt_flen)
1563                 ipv6_push_frag_opts(skb, opt, &proto);
1564         if (opt && opt->opt_nflen)
1565                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1566 
1567         skb_push(skb, sizeof(struct ipv6hdr));
1568         skb_reset_network_header(skb);
1569         hdr = ipv6_hdr(skb);
1570 
1571         ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel);
1572         hdr->hop_limit = np->cork.hop_limit;
1573         hdr->nexthdr = proto;
1574         hdr->saddr = fl6->saddr;
1575         hdr->daddr = *final_dst;
1576 
1577         skb->priority = sk->sk_priority;
1578         skb->mark = sk->sk_mark;
1579 
1580         skb_dst_set(skb, dst_clone(&rt->dst));
1581         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1582         if (proto == IPPROTO_ICMPV6) {
1583                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1584 
1585                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1586                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1587         }
1588 
1589         err = ip6_local_out(skb);
1590         if (err) {
1591                 if (err > 0)
1592                         err = net_xmit_errno(err);
1593                 if (err)
1594                         goto error;
1595         }
1596 
1597 out:
1598         ip6_cork_release(inet, np);
1599         return err;
1600 error:
1601         IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1602         goto out;
1603 }
1604 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1605 
1606 void ip6_flush_pending_frames(struct sock *sk)
1607 {
1608         struct sk_buff *skb;
1609 
1610         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1611                 if (skb_dst(skb))
1612                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1613                                       IPSTATS_MIB_OUTDISCARDS);
1614                 kfree_skb(skb);
1615         }
1616 
1617         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1618 }
1619 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1620 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp