~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/ipv4/tcp_ipv4.c

Version: ~ [ linux-5.4-rc7 ] ~ [ linux-5.3.10 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.83 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.153 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.200 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.200 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.76 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
  3  *              operating system.  INET is implemented using the  BSD Socket
  4  *              interface as the means of communication with the user level.
  5  *
  6  *              Implementation of the Transmission Control Protocol(TCP).
  7  *
  8  *              IPv4 specific functions
  9  *
 10  *
 11  *              code split from:
 12  *              linux/ipv4/tcp.c
 13  *              linux/ipv4/tcp_input.c
 14  *              linux/ipv4/tcp_output.c
 15  *
 16  *              See tcp.c for author information
 17  *
 18  *      This program is free software; you can redistribute it and/or
 19  *      modify it under the terms of the GNU General Public License
 20  *      as published by the Free Software Foundation; either version
 21  *      2 of the License, or (at your option) any later version.
 22  */
 23 
 24 /*
 25  * Changes:
 26  *              David S. Miller :       New socket lookup architecture.
 27  *                                      This code is dedicated to John Dyson.
 28  *              David S. Miller :       Change semantics of established hash,
 29  *                                      half is devoted to TIME_WAIT sockets
 30  *                                      and the rest go in the other half.
 31  *              Andi Kleen :            Add support for syncookies and fixed
 32  *                                      some bugs: ip options weren't passed to
 33  *                                      the TCP layer, missed a check for an
 34  *                                      ACK bit.
 35  *              Andi Kleen :            Implemented fast path mtu discovery.
 36  *                                      Fixed many serious bugs in the
 37  *                                      request_sock handling and moved
 38  *                                      most of it into the af independent code.
 39  *                                      Added tail drop and some other bugfixes.
 40  *                                      Added new listen semantics.
 41  *              Mike McLagan    :       Routing by source
 42  *      Juan Jose Ciarlante:            ip_dynaddr bits
 43  *              Andi Kleen:             various fixes.
 44  *      Vitaly E. Lavrov        :       Transparent proxy revived after year
 45  *                                      coma.
 46  *      Andi Kleen              :       Fix new listen.
 47  *      Andi Kleen              :       Fix accept error reporting.
 48  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
 49  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
 50  *                                      a single port at the same time.
 51  */
 52 
 53 #define pr_fmt(fmt) "TCP: " fmt
 54 
 55 #include <linux/bottom_half.h>
 56 #include <linux/types.h>
 57 #include <linux/fcntl.h>
 58 #include <linux/module.h>
 59 #include <linux/random.h>
 60 #include <linux/cache.h>
 61 #include <linux/jhash.h>
 62 #include <linux/init.h>
 63 #include <linux/times.h>
 64 #include <linux/slab.h>
 65 
 66 #include <net/net_namespace.h>
 67 #include <net/icmp.h>
 68 #include <net/inet_hashtables.h>
 69 #include <net/tcp.h>
 70 #include <net/transp_v6.h>
 71 #include <net/ipv6.h>
 72 #include <net/inet_common.h>
 73 #include <net/timewait_sock.h>
 74 #include <net/xfrm.h>
 75 #include <net/secure_seq.h>
 76 #include <net/busy_poll.h>
 77 
 78 #include <linux/inet.h>
 79 #include <linux/ipv6.h>
 80 #include <linux/stddef.h>
 81 #include <linux/proc_fs.h>
 82 #include <linux/seq_file.h>
 83 
 84 #include <crypto/hash.h>
 85 #include <linux/scatterlist.h>
 86 
 87 int sysctl_tcp_tw_reuse __read_mostly;
 88 int sysctl_tcp_low_latency __read_mostly;
 89 EXPORT_SYMBOL(sysctl_tcp_low_latency);
 90 
 91 #ifdef CONFIG_TCP_MD5SIG
 92 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
 93                                __be32 daddr, __be32 saddr, const struct tcphdr *th);
 94 #endif
 95 
 96 struct inet_hashinfo tcp_hashinfo;
 97 EXPORT_SYMBOL(tcp_hashinfo);
 98 
 99 static  __u32 tcp_v4_init_sequence(const struct sk_buff *skb)
100 {
101         return secure_tcp_sequence_number(ip_hdr(skb)->daddr,
102                                           ip_hdr(skb)->saddr,
103                                           tcp_hdr(skb)->dest,
104                                           tcp_hdr(skb)->source);
105 }
106 
107 int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
108 {
109         const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
110         struct tcp_sock *tp = tcp_sk(sk);
111 
112         /* With PAWS, it is safe from the viewpoint
113            of data integrity. Even without PAWS it is safe provided sequence
114            spaces do not overlap i.e. at data rates <= 80Mbit/sec.
115 
116            Actually, the idea is close to VJ's one, only timestamp cache is
117            held not per host, but per port pair and TW bucket is used as state
118            holder.
119 
120            If TW bucket has been already destroyed we fall back to VJ's scheme
121            and use initial timestamp retrieved from peer table.
122          */
123         if (tcptw->tw_ts_recent_stamp &&
124             (!twp || (sysctl_tcp_tw_reuse &&
125                              get_seconds() - tcptw->tw_ts_recent_stamp > 1))) {
126                 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
127                 if (tp->write_seq == 0)
128                         tp->write_seq = 1;
129                 tp->rx_opt.ts_recent       = tcptw->tw_ts_recent;
130                 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
131                 sock_hold(sktw);
132                 return 1;
133         }
134 
135         return 0;
136 }
137 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
138 
139 /* This will initiate an outgoing connection. */
140 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
141 {
142         struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
143         struct inet_sock *inet = inet_sk(sk);
144         struct tcp_sock *tp = tcp_sk(sk);
145         __be16 orig_sport, orig_dport;
146         __be32 daddr, nexthop;
147         struct flowi4 *fl4;
148         struct rtable *rt;
149         int err;
150         struct ip_options_rcu *inet_opt;
151 
152         if (addr_len < sizeof(struct sockaddr_in))
153                 return -EINVAL;
154 
155         if (usin->sin_family != AF_INET)
156                 return -EAFNOSUPPORT;
157 
158         nexthop = daddr = usin->sin_addr.s_addr;
159         inet_opt = rcu_dereference_protected(inet->inet_opt,
160                                              lockdep_sock_is_held(sk));
161         if (inet_opt && inet_opt->opt.srr) {
162                 if (!daddr)
163                         return -EINVAL;
164                 nexthop = inet_opt->opt.faddr;
165         }
166 
167         orig_sport = inet->inet_sport;
168         orig_dport = usin->sin_port;
169         fl4 = &inet->cork.fl.u.ip4;
170         rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
171                               RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
172                               IPPROTO_TCP,
173                               orig_sport, orig_dport, sk);
174         if (IS_ERR(rt)) {
175                 err = PTR_ERR(rt);
176                 if (err == -ENETUNREACH)
177                         IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
178                 return err;
179         }
180 
181         if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
182                 ip_rt_put(rt);
183                 return -ENETUNREACH;
184         }
185 
186         if (!inet_opt || !inet_opt->opt.srr)
187                 daddr = fl4->daddr;
188 
189         if (!inet->inet_saddr)
190                 inet->inet_saddr = fl4->saddr;
191         sk_rcv_saddr_set(sk, inet->inet_saddr);
192 
193         if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
194                 /* Reset inherited state */
195                 tp->rx_opt.ts_recent       = 0;
196                 tp->rx_opt.ts_recent_stamp = 0;
197                 if (likely(!tp->repair))
198                         tp->write_seq      = 0;
199         }
200 
201         if (tcp_death_row.sysctl_tw_recycle &&
202             !tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
203                 tcp_fetch_timewait_stamp(sk, &rt->dst);
204 
205         inet->inet_dport = usin->sin_port;
206         sk_daddr_set(sk, daddr);
207 
208         inet_csk(sk)->icsk_ext_hdr_len = 0;
209         if (inet_opt)
210                 inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
211 
212         tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
213 
214         /* Socket identity is still unknown (sport may be zero).
215          * However we set state to SYN-SENT and not releasing socket
216          * lock select source port, enter ourselves into the hash tables and
217          * complete initialization after this.
218          */
219         tcp_set_state(sk, TCP_SYN_SENT);
220         err = inet_hash_connect(&tcp_death_row, sk);
221         if (err)
222                 goto failure;
223 
224         sk_set_txhash(sk);
225 
226         rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
227                                inet->inet_sport, inet->inet_dport, sk);
228         if (IS_ERR(rt)) {
229                 err = PTR_ERR(rt);
230                 rt = NULL;
231                 goto failure;
232         }
233         /* OK, now commit destination to socket.  */
234         sk->sk_gso_type = SKB_GSO_TCPV4;
235         sk_setup_caps(sk, &rt->dst);
236 
237         if (!tp->write_seq && likely(!tp->repair))
238                 tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
239                                                            inet->inet_daddr,
240                                                            inet->inet_sport,
241                                                            usin->sin_port);
242 
243         inet->inet_id = tp->write_seq ^ jiffies;
244 
245         err = tcp_connect(sk);
246 
247         rt = NULL;
248         if (err)
249                 goto failure;
250 
251         return 0;
252 
253 failure:
254         /*
255          * This unhashes the socket and releases the local port,
256          * if necessary.
257          */
258         tcp_set_state(sk, TCP_CLOSE);
259         ip_rt_put(rt);
260         sk->sk_route_caps = 0;
261         inet->inet_dport = 0;
262         return err;
263 }
264 EXPORT_SYMBOL(tcp_v4_connect);
265 
266 /*
267  * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191.
268  * It can be called through tcp_release_cb() if socket was owned by user
269  * at the time tcp_v4_err() was called to handle ICMP message.
270  */
271 void tcp_v4_mtu_reduced(struct sock *sk)
272 {
273         struct dst_entry *dst;
274         struct inet_sock *inet = inet_sk(sk);
275         u32 mtu = tcp_sk(sk)->mtu_info;
276 
277         dst = inet_csk_update_pmtu(sk, mtu);
278         if (!dst)
279                 return;
280 
281         /* Something is about to be wrong... Remember soft error
282          * for the case, if this connection will not able to recover.
283          */
284         if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
285                 sk->sk_err_soft = EMSGSIZE;
286 
287         mtu = dst_mtu(dst);
288 
289         if (inet->pmtudisc != IP_PMTUDISC_DONT &&
290             ip_sk_accept_pmtu(sk) &&
291             inet_csk(sk)->icsk_pmtu_cookie > mtu) {
292                 tcp_sync_mss(sk, mtu);
293 
294                 /* Resend the TCP packet because it's
295                  * clear that the old packet has been
296                  * dropped. This is the new "fast" path mtu
297                  * discovery.
298                  */
299                 tcp_simple_retransmit(sk);
300         } /* else let the usual retransmit timer handle it */
301 }
302 EXPORT_SYMBOL(tcp_v4_mtu_reduced);
303 
304 static void do_redirect(struct sk_buff *skb, struct sock *sk)
305 {
306         struct dst_entry *dst = __sk_dst_check(sk, 0);
307 
308         if (dst)
309                 dst->ops->redirect(dst, sk, skb);
310 }
311 
312 
313 /* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
314 void tcp_req_err(struct sock *sk, u32 seq, bool abort)
315 {
316         struct request_sock *req = inet_reqsk(sk);
317         struct net *net = sock_net(sk);
318 
319         /* ICMPs are not backlogged, hence we cannot get
320          * an established socket here.
321          */
322         if (seq != tcp_rsk(req)->snt_isn) {
323                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
324         } else if (abort) {
325                 /*
326                  * Still in SYN_RECV, just remove it silently.
327                  * There is no good way to pass the error to the newly
328                  * created socket, and POSIX does not want network
329                  * errors returned from accept().
330                  */
331                 inet_csk_reqsk_queue_drop(req->rsk_listener, req);
332                 tcp_listendrop(req->rsk_listener);
333         }
334         reqsk_put(req);
335 }
336 EXPORT_SYMBOL(tcp_req_err);
337 
338 /*
339  * This routine is called by the ICMP module when it gets some
340  * sort of error condition.  If err < 0 then the socket should
341  * be closed and the error returned to the user.  If err > 0
342  * it's just the icmp type << 8 | icmp code.  After adjustment
343  * header points to the first 8 bytes of the tcp header.  We need
344  * to find the appropriate port.
345  *
346  * The locking strategy used here is very "optimistic". When
347  * someone else accesses the socket the ICMP is just dropped
348  * and for some paths there is no check at all.
349  * A more general error queue to queue errors for later handling
350  * is probably better.
351  *
352  */
353 
354 void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
355 {
356         const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
357         struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
358         struct inet_connection_sock *icsk;
359         struct tcp_sock *tp;
360         struct inet_sock *inet;
361         const int type = icmp_hdr(icmp_skb)->type;
362         const int code = icmp_hdr(icmp_skb)->code;
363         struct sock *sk;
364         struct sk_buff *skb;
365         struct request_sock *fastopen;
366         __u32 seq, snd_una;
367         __u32 remaining;
368         int err;
369         struct net *net = dev_net(icmp_skb->dev);
370 
371         sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
372                                        th->dest, iph->saddr, ntohs(th->source),
373                                        inet_iif(icmp_skb));
374         if (!sk) {
375                 __ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
376                 return;
377         }
378         if (sk->sk_state == TCP_TIME_WAIT) {
379                 inet_twsk_put(inet_twsk(sk));
380                 return;
381         }
382         seq = ntohl(th->seq);
383         if (sk->sk_state == TCP_NEW_SYN_RECV)
384                 return tcp_req_err(sk, seq,
385                                   type == ICMP_PARAMETERPROB ||
386                                   type == ICMP_TIME_EXCEEDED ||
387                                   (type == ICMP_DEST_UNREACH &&
388                                    (code == ICMP_NET_UNREACH ||
389                                     code == ICMP_HOST_UNREACH)));
390 
391         bh_lock_sock(sk);
392         /* If too many ICMPs get dropped on busy
393          * servers this needs to be solved differently.
394          * We do take care of PMTU discovery (RFC1191) special case :
395          * we can receive locally generated ICMP messages while socket is held.
396          */
397         if (sock_owned_by_user(sk)) {
398                 if (!(type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED))
399                         __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
400         }
401         if (sk->sk_state == TCP_CLOSE)
402                 goto out;
403 
404         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
405                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
406                 goto out;
407         }
408 
409         icsk = inet_csk(sk);
410         tp = tcp_sk(sk);
411         /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
412         fastopen = tp->fastopen_rsk;
413         snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
414         if (sk->sk_state != TCP_LISTEN &&
415             !between(seq, snd_una, tp->snd_nxt)) {
416                 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
417                 goto out;
418         }
419 
420         switch (type) {
421         case ICMP_REDIRECT:
422                 do_redirect(icmp_skb, sk);
423                 goto out;
424         case ICMP_SOURCE_QUENCH:
425                 /* Just silently ignore these. */
426                 goto out;
427         case ICMP_PARAMETERPROB:
428                 err = EPROTO;
429                 break;
430         case ICMP_DEST_UNREACH:
431                 if (code > NR_ICMP_UNREACH)
432                         goto out;
433 
434                 if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
435                         /* We are not interested in TCP_LISTEN and open_requests
436                          * (SYN-ACKs send out by Linux are always <576bytes so
437                          * they should go through unfragmented).
438                          */
439                         if (sk->sk_state == TCP_LISTEN)
440                                 goto out;
441 
442                         tp->mtu_info = info;
443                         if (!sock_owned_by_user(sk)) {
444                                 tcp_v4_mtu_reduced(sk);
445                         } else {
446                                 if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags))
447                                         sock_hold(sk);
448                         }
449                         goto out;
450                 }
451 
452                 err = icmp_err_convert[code].errno;
453                 /* check if icmp_skb allows revert of backoff
454                  * (see draft-zimmermann-tcp-lcd) */
455                 if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
456                         break;
457                 if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
458                     !icsk->icsk_backoff || fastopen)
459                         break;
460 
461                 if (sock_owned_by_user(sk))
462                         break;
463 
464                 icsk->icsk_backoff--;
465                 icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
466                                                TCP_TIMEOUT_INIT;
467                 icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
468 
469                 skb = tcp_write_queue_head(sk);
470                 BUG_ON(!skb);
471 
472                 remaining = icsk->icsk_rto -
473                             min(icsk->icsk_rto,
474                                 tcp_time_stamp - tcp_skb_timestamp(skb));
475 
476                 if (remaining) {
477                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
478                                                   remaining, TCP_RTO_MAX);
479                 } else {
480                         /* RTO revert clocked out retransmission.
481                          * Will retransmit now */
482                         tcp_retransmit_timer(sk);
483                 }
484 
485                 break;
486         case ICMP_TIME_EXCEEDED:
487                 err = EHOSTUNREACH;
488                 break;
489         default:
490                 goto out;
491         }
492 
493         switch (sk->sk_state) {
494         case TCP_SYN_SENT:
495         case TCP_SYN_RECV:
496                 /* Only in fast or simultaneous open. If a fast open socket is
497                  * is already accepted it is treated as a connected one below.
498                  */
499                 if (fastopen && !fastopen->sk)
500                         break;
501 
502                 if (!sock_owned_by_user(sk)) {
503                         sk->sk_err = err;
504 
505                         sk->sk_error_report(sk);
506 
507                         tcp_done(sk);
508                 } else {
509                         sk->sk_err_soft = err;
510                 }
511                 goto out;
512         }
513 
514         /* If we've already connected we will keep trying
515          * until we time out, or the user gives up.
516          *
517          * rfc1122 4.2.3.9 allows to consider as hard errors
518          * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
519          * but it is obsoleted by pmtu discovery).
520          *
521          * Note, that in modern internet, where routing is unreliable
522          * and in each dark corner broken firewalls sit, sending random
523          * errors ordered by their masters even this two messages finally lose
524          * their original sense (even Linux sends invalid PORT_UNREACHs)
525          *
526          * Now we are in compliance with RFCs.
527          *                                                      --ANK (980905)
528          */
529 
530         inet = inet_sk(sk);
531         if (!sock_owned_by_user(sk) && inet->recverr) {
532                 sk->sk_err = err;
533                 sk->sk_error_report(sk);
534         } else  { /* Only an error on timeout */
535                 sk->sk_err_soft = err;
536         }
537 
538 out:
539         bh_unlock_sock(sk);
540         sock_put(sk);
541 }
542 
543 void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr)
544 {
545         struct tcphdr *th = tcp_hdr(skb);
546 
547         if (skb->ip_summed == CHECKSUM_PARTIAL) {
548                 th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0);
549                 skb->csum_start = skb_transport_header(skb) - skb->head;
550                 skb->csum_offset = offsetof(struct tcphdr, check);
551         } else {
552                 th->check = tcp_v4_check(skb->len, saddr, daddr,
553                                          csum_partial(th,
554                                                       th->doff << 2,
555                                                       skb->csum));
556         }
557 }
558 
559 /* This routine computes an IPv4 TCP checksum. */
560 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)
561 {
562         const struct inet_sock *inet = inet_sk(sk);
563 
564         __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr);
565 }
566 EXPORT_SYMBOL(tcp_v4_send_check);
567 
568 /*
569  *      This routine will send an RST to the other tcp.
570  *
571  *      Someone asks: why I NEVER use socket parameters (TOS, TTL etc.)
572  *                    for reset.
573  *      Answer: if a packet caused RST, it is not for a socket
574  *              existing in our system, if it is matched to a socket,
575  *              it is just duplicate segment or bug in other side's TCP.
576  *              So that we build reply only basing on parameters
577  *              arrived with segment.
578  *      Exception: precedence violation. We do not implement it in any case.
579  */
580 
581 static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
582 {
583         const struct tcphdr *th = tcp_hdr(skb);
584         struct {
585                 struct tcphdr th;
586 #ifdef CONFIG_TCP_MD5SIG
587                 __be32 opt[(TCPOLEN_MD5SIG_ALIGNED >> 2)];
588 #endif
589         } rep;
590         struct ip_reply_arg arg;
591 #ifdef CONFIG_TCP_MD5SIG
592         struct tcp_md5sig_key *key = NULL;
593         const __u8 *hash_location = NULL;
594         unsigned char newhash[16];
595         int genhash;
596         struct sock *sk1 = NULL;
597 #endif
598         struct net *net;
599 
600         /* Never send a reset in response to a reset. */
601         if (th->rst)
602                 return;
603 
604         /* If sk not NULL, it means we did a successful lookup and incoming
605          * route had to be correct. prequeue might have dropped our dst.
606          */
607         if (!sk && skb_rtable(skb)->rt_type != RTN_LOCAL)
608                 return;
609 
610         /* Swap the send and the receive. */
611         memset(&rep, 0, sizeof(rep));
612         rep.th.dest   = th->source;
613         rep.th.source = th->dest;
614         rep.th.doff   = sizeof(struct tcphdr) / 4;
615         rep.th.rst    = 1;
616 
617         if (th->ack) {
618                 rep.th.seq = th->ack_seq;
619         } else {
620                 rep.th.ack = 1;
621                 rep.th.ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin +
622                                        skb->len - (th->doff << 2));
623         }
624 
625         memset(&arg, 0, sizeof(arg));
626         arg.iov[0].iov_base = (unsigned char *)&rep;
627         arg.iov[0].iov_len  = sizeof(rep.th);
628 
629         net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
630 #ifdef CONFIG_TCP_MD5SIG
631         rcu_read_lock();
632         hash_location = tcp_parse_md5sig_option(th);
633         if (sk && sk_fullsock(sk)) {
634                 key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)
635                                         &ip_hdr(skb)->saddr, AF_INET);
636         } else if (hash_location) {
637                 /*
638                  * active side is lost. Try to find listening socket through
639                  * source port, and then find md5 key through listening socket.
640                  * we are not loose security here:
641                  * Incoming packet is checked with md5 hash with finding key,
642                  * no RST generated if md5 hash doesn't match.
643                  */
644                 sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0,
645                                              ip_hdr(skb)->saddr,
646                                              th->source, ip_hdr(skb)->daddr,
647                                              ntohs(th->source), inet_iif(skb));
648                 /* don't send rst if it can't find key */
649                 if (!sk1)
650                         goto out;
651 
652                 key = tcp_md5_do_lookup(sk1, (union tcp_md5_addr *)
653                                         &ip_hdr(skb)->saddr, AF_INET);
654                 if (!key)
655                         goto out;
656 
657 
658                 genhash = tcp_v4_md5_hash_skb(newhash, key, NULL, skb);
659                 if (genhash || memcmp(hash_location, newhash, 16) != 0)
660                         goto out;
661 
662         }
663 
664         if (key) {
665                 rep.opt[0] = htonl((TCPOPT_NOP << 24) |
666                                    (TCPOPT_NOP << 16) |
667                                    (TCPOPT_MD5SIG << 8) |
668                                    TCPOLEN_MD5SIG);
669                 /* Update length and the length the header thinks exists */
670                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
671                 rep.th.doff = arg.iov[0].iov_len / 4;
672 
673                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1],
674                                      key, ip_hdr(skb)->saddr,
675                                      ip_hdr(skb)->daddr, &rep.th);
676         }
677 #endif
678         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
679                                       ip_hdr(skb)->saddr, /* XXX */
680                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
681         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
682         arg.flags = (sk && inet_sk_transparent(sk)) ? IP_REPLY_ARG_NOSRCCHECK : 0;
683 
684         /* When socket is gone, all binding information is lost.
685          * routing might fail in this case. No choice here, if we choose to force
686          * input interface, we will misroute in case of asymmetric route.
687          */
688         if (sk)
689                 arg.bound_dev_if = sk->sk_bound_dev_if;
690 
691         BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) !=
692                      offsetof(struct inet_timewait_sock, tw_bound_dev_if));
693 
694         arg.tos = ip_hdr(skb)->tos;
695         local_bh_disable();
696         ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
697                               skb, &TCP_SKB_CB(skb)->header.h4.opt,
698                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
699                               &arg, arg.iov[0].iov_len);
700 
701         __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
702         __TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
703         local_bh_enable();
704 
705 #ifdef CONFIG_TCP_MD5SIG
706 out:
707         rcu_read_unlock();
708 #endif
709 }
710 
711 /* The code following below sending ACKs in SYN-RECV and TIME-WAIT states
712    outside socket context is ugly, certainly. What can I do?
713  */
714 
715 static void tcp_v4_send_ack(struct net *net,
716                             struct sk_buff *skb, u32 seq, u32 ack,
717                             u32 win, u32 tsval, u32 tsecr, int oif,
718                             struct tcp_md5sig_key *key,
719                             int reply_flags, u8 tos)
720 {
721         const struct tcphdr *th = tcp_hdr(skb);
722         struct {
723                 struct tcphdr th;
724                 __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2)
725 #ifdef CONFIG_TCP_MD5SIG
726                            + (TCPOLEN_MD5SIG_ALIGNED >> 2)
727 #endif
728                         ];
729         } rep;
730         struct ip_reply_arg arg;
731 
732         memset(&rep.th, 0, sizeof(struct tcphdr));
733         memset(&arg, 0, sizeof(arg));
734 
735         arg.iov[0].iov_base = (unsigned char *)&rep;
736         arg.iov[0].iov_len  = sizeof(rep.th);
737         if (tsecr) {
738                 rep.opt[0] = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
739                                    (TCPOPT_TIMESTAMP << 8) |
740                                    TCPOLEN_TIMESTAMP);
741                 rep.opt[1] = htonl(tsval);
742                 rep.opt[2] = htonl(tsecr);
743                 arg.iov[0].iov_len += TCPOLEN_TSTAMP_ALIGNED;
744         }
745 
746         /* Swap the send and the receive. */
747         rep.th.dest    = th->source;
748         rep.th.source  = th->dest;
749         rep.th.doff    = arg.iov[0].iov_len / 4;
750         rep.th.seq     = htonl(seq);
751         rep.th.ack_seq = htonl(ack);
752         rep.th.ack     = 1;
753         rep.th.window  = htons(win);
754 
755 #ifdef CONFIG_TCP_MD5SIG
756         if (key) {
757                 int offset = (tsecr) ? 3 : 0;
758 
759                 rep.opt[offset++] = htonl((TCPOPT_NOP << 24) |
760                                           (TCPOPT_NOP << 16) |
761                                           (TCPOPT_MD5SIG << 8) |
762                                           TCPOLEN_MD5SIG);
763                 arg.iov[0].iov_len += TCPOLEN_MD5SIG_ALIGNED;
764                 rep.th.doff = arg.iov[0].iov_len/4;
765 
766                 tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[offset],
767                                     key, ip_hdr(skb)->saddr,
768                                     ip_hdr(skb)->daddr, &rep.th);
769         }
770 #endif
771         arg.flags = reply_flags;
772         arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr,
773                                       ip_hdr(skb)->saddr, /* XXX */
774                                       arg.iov[0].iov_len, IPPROTO_TCP, 0);
775         arg.csumoffset = offsetof(struct tcphdr, check) / 2;
776         if (oif)
777                 arg.bound_dev_if = oif;
778         arg.tos = tos;
779         local_bh_disable();
780         ip_send_unicast_reply(*this_cpu_ptr(net->ipv4.tcp_sk),
781                               skb, &TCP_SKB_CB(skb)->header.h4.opt,
782                               ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
783                               &arg, arg.iov[0].iov_len);
784 
785         __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
786         local_bh_enable();
787 }
788 
789 static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
790 {
791         struct inet_timewait_sock *tw = inet_twsk(sk);
792         struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
793 
794         tcp_v4_send_ack(sock_net(sk), skb,
795                         tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
796                         tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
797                         tcp_time_stamp + tcptw->tw_ts_offset,
798                         tcptw->tw_ts_recent,
799                         tw->tw_bound_dev_if,
800                         tcp_twsk_md5_key(tcptw),
801                         tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
802                         tw->tw_tos
803                         );
804 
805         inet_twsk_put(tw);
806 }
807 
808 static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
809                                   struct request_sock *req)
810 {
811         /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
812          * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
813          */
814         u32 seq = (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 :
815                                              tcp_sk(sk)->snd_nxt;
816 
817         /* RFC 7323 2.3
818          * The window field (SEG.WND) of every outgoing segment, with the
819          * exception of <SYN> segments, MUST be right-shifted by
820          * Rcv.Wind.Shift bits:
821          */
822         tcp_v4_send_ack(sock_net(sk), skb, seq,
823                         tcp_rsk(req)->rcv_nxt,
824                         req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
825                         tcp_time_stamp,
826                         req->ts_recent,
827                         0,
828                         tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr,
829                                           AF_INET),
830                         inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
831                         ip_hdr(skb)->tos);
832 }
833 
834 /*
835  *      Send a SYN-ACK after having received a SYN.
836  *      This still operates on a request_sock only, not on a big
837  *      socket.
838  */
839 static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
840                               struct flowi *fl,
841                               struct request_sock *req,
842                               struct tcp_fastopen_cookie *foc,
843                               enum tcp_synack_type synack_type)
844 {
845         const struct inet_request_sock *ireq = inet_rsk(req);
846         struct flowi4 fl4;
847         int err = -1;
848         struct sk_buff *skb;
849 
850         /* First, grab a route. */
851         if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
852                 return -1;
853 
854         skb = tcp_make_synack(sk, dst, req, foc, synack_type);
855 
856         if (skb) {
857                 __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
858 
859                 err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
860                                             ireq->ir_rmt_addr,
861                                             ireq->opt);
862                 err = net_xmit_eval(err);
863         }
864 
865         return err;
866 }
867 
868 /*
869  *      IPv4 request_sock destructor.
870  */
871 static void tcp_v4_reqsk_destructor(struct request_sock *req)
872 {
873         kfree(inet_rsk(req)->opt);
874 }
875 
876 #ifdef CONFIG_TCP_MD5SIG
877 /*
878  * RFC2385 MD5 checksumming requires a mapping of
879  * IP address->MD5 Key.
880  * We need to maintain these in the sk structure.
881  */
882 
883 /* Find the Key structure for an address.  */
884 struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk,
885                                          const union tcp_md5_addr *addr,
886                                          int family)
887 {
888         const struct tcp_sock *tp = tcp_sk(sk);
889         struct tcp_md5sig_key *key;
890         unsigned int size = sizeof(struct in_addr);
891         const struct tcp_md5sig_info *md5sig;
892 
893         /* caller either holds rcu_read_lock() or socket lock */
894         md5sig = rcu_dereference_check(tp->md5sig_info,
895                                        lockdep_sock_is_held(sk));
896         if (!md5sig)
897                 return NULL;
898 #if IS_ENABLED(CONFIG_IPV6)
899         if (family == AF_INET6)
900                 size = sizeof(struct in6_addr);
901 #endif
902         hlist_for_each_entry_rcu(key, &md5sig->head, node) {
903                 if (key->family != family)
904                         continue;
905                 if (!memcmp(&key->addr, addr, size))
906                         return key;
907         }
908         return NULL;
909 }
910 EXPORT_SYMBOL(tcp_md5_do_lookup);
911 
912 struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
913                                          const struct sock *addr_sk)
914 {
915         const union tcp_md5_addr *addr;
916 
917         addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr;
918         return tcp_md5_do_lookup(sk, addr, AF_INET);
919 }
920 EXPORT_SYMBOL(tcp_v4_md5_lookup);
921 
922 /* This can be called on a newly created socket, from other files */
923 int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
924                    int family, const u8 *newkey, u8 newkeylen, gfp_t gfp)
925 {
926         /* Add Key to the list */
927         struct tcp_md5sig_key *key;
928         struct tcp_sock *tp = tcp_sk(sk);
929         struct tcp_md5sig_info *md5sig;
930 
931         key = tcp_md5_do_lookup(sk, addr, family);
932         if (key) {
933                 /* Pre-existing entry - just update that one. */
934                 memcpy(key->key, newkey, newkeylen);
935                 key->keylen = newkeylen;
936                 return 0;
937         }
938 
939         md5sig = rcu_dereference_protected(tp->md5sig_info,
940                                            lockdep_sock_is_held(sk));
941         if (!md5sig) {
942                 md5sig = kmalloc(sizeof(*md5sig), gfp);
943                 if (!md5sig)
944                         return -ENOMEM;
945 
946                 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
947                 INIT_HLIST_HEAD(&md5sig->head);
948                 rcu_assign_pointer(tp->md5sig_info, md5sig);
949         }
950 
951         key = sock_kmalloc(sk, sizeof(*key), gfp);
952         if (!key)
953                 return -ENOMEM;
954         if (!tcp_alloc_md5sig_pool()) {
955                 sock_kfree_s(sk, key, sizeof(*key));
956                 return -ENOMEM;
957         }
958 
959         memcpy(key->key, newkey, newkeylen);
960         key->keylen = newkeylen;
961         key->family = family;
962         memcpy(&key->addr, addr,
963                (family == AF_INET6) ? sizeof(struct in6_addr) :
964                                       sizeof(struct in_addr));
965         hlist_add_head_rcu(&key->node, &md5sig->head);
966         return 0;
967 }
968 EXPORT_SYMBOL(tcp_md5_do_add);
969 
970 int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family)
971 {
972         struct tcp_md5sig_key *key;
973 
974         key = tcp_md5_do_lookup(sk, addr, family);
975         if (!key)
976                 return -ENOENT;
977         hlist_del_rcu(&key->node);
978         atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
979         kfree_rcu(key, rcu);
980         return 0;
981 }
982 EXPORT_SYMBOL(tcp_md5_do_del);
983 
984 static void tcp_clear_md5_list(struct sock *sk)
985 {
986         struct tcp_sock *tp = tcp_sk(sk);
987         struct tcp_md5sig_key *key;
988         struct hlist_node *n;
989         struct tcp_md5sig_info *md5sig;
990 
991         md5sig = rcu_dereference_protected(tp->md5sig_info, 1);
992 
993         hlist_for_each_entry_safe(key, n, &md5sig->head, node) {
994                 hlist_del_rcu(&key->node);
995                 atomic_sub(sizeof(*key), &sk->sk_omem_alloc);
996                 kfree_rcu(key, rcu);
997         }
998 }
999 
1000 static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval,
1001                                  int optlen)
1002 {
1003         struct tcp_md5sig cmd;
1004         struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr;
1005 
1006         if (optlen < sizeof(cmd))
1007                 return -EINVAL;
1008 
1009         if (copy_from_user(&cmd, optval, sizeof(cmd)))
1010                 return -EFAULT;
1011 
1012         if (sin->sin_family != AF_INET)
1013                 return -EINVAL;
1014 
1015         if (!cmd.tcpm_keylen)
1016                 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1017                                       AF_INET);
1018 
1019         if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
1020                 return -EINVAL;
1021 
1022         return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr,
1023                               AF_INET, cmd.tcpm_key, cmd.tcpm_keylen,
1024                               GFP_KERNEL);
1025 }
1026 
1027 static int tcp_v4_md5_hash_headers(struct tcp_md5sig_pool *hp,
1028                                    __be32 daddr, __be32 saddr,
1029                                    const struct tcphdr *th, int nbytes)
1030 {
1031         struct tcp4_pseudohdr *bp;
1032         struct scatterlist sg;
1033         struct tcphdr *_th;
1034 
1035         bp = hp->scratch;
1036         bp->saddr = saddr;
1037         bp->daddr = daddr;
1038         bp->pad = 0;
1039         bp->protocol = IPPROTO_TCP;
1040         bp->len = cpu_to_be16(nbytes);
1041 
1042         _th = (struct tcphdr *)(bp + 1);
1043         memcpy(_th, th, sizeof(*th));
1044         _th->check = 0;
1045 
1046         sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
1047         ahash_request_set_crypt(hp->md5_req, &sg, NULL,
1048                                 sizeof(*bp) + sizeof(*th));
1049         return crypto_ahash_update(hp->md5_req);
1050 }
1051 
1052 static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
1053                                __be32 daddr, __be32 saddr, const struct tcphdr *th)
1054 {
1055         struct tcp_md5sig_pool *hp;
1056         struct ahash_request *req;
1057 
1058         hp = tcp_get_md5sig_pool();
1059         if (!hp)
1060                 goto clear_hash_noput;
1061         req = hp->md5_req;
1062 
1063         if (crypto_ahash_init(req))
1064                 goto clear_hash;
1065         if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
1066                 goto clear_hash;
1067         if (tcp_md5_hash_key(hp, key))
1068                 goto clear_hash;
1069         ahash_request_set_crypt(req, NULL, md5_hash, 0);
1070         if (crypto_ahash_final(req))
1071                 goto clear_hash;
1072 
1073         tcp_put_md5sig_pool();
1074         return 0;
1075 
1076 clear_hash:
1077         tcp_put_md5sig_pool();
1078 clear_hash_noput:
1079         memset(md5_hash, 0, 16);
1080         return 1;
1081 }
1082 
1083 int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
1084                         const struct sock *sk,
1085                         const struct sk_buff *skb)
1086 {
1087         struct tcp_md5sig_pool *hp;
1088         struct ahash_request *req;
1089         const struct tcphdr *th = tcp_hdr(skb);
1090         __be32 saddr, daddr;
1091 
1092         if (sk) { /* valid for establish/request sockets */
1093                 saddr = sk->sk_rcv_saddr;
1094                 daddr = sk->sk_daddr;
1095         } else {
1096                 const struct iphdr *iph = ip_hdr(skb);
1097                 saddr = iph->saddr;
1098                 daddr = iph->daddr;
1099         }
1100 
1101         hp = tcp_get_md5sig_pool();
1102         if (!hp)
1103                 goto clear_hash_noput;
1104         req = hp->md5_req;
1105 
1106         if (crypto_ahash_init(req))
1107                 goto clear_hash;
1108 
1109         if (tcp_v4_md5_hash_headers(hp, daddr, saddr, th, skb->len))
1110                 goto clear_hash;
1111         if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
1112                 goto clear_hash;
1113         if (tcp_md5_hash_key(hp, key))
1114                 goto clear_hash;
1115         ahash_request_set_crypt(req, NULL, md5_hash, 0);
1116         if (crypto_ahash_final(req))
1117                 goto clear_hash;
1118 
1119         tcp_put_md5sig_pool();
1120         return 0;
1121 
1122 clear_hash:
1123         tcp_put_md5sig_pool();
1124 clear_hash_noput:
1125         memset(md5_hash, 0, 16);
1126         return 1;
1127 }
1128 EXPORT_SYMBOL(tcp_v4_md5_hash_skb);
1129 
1130 #endif
1131 
1132 /* Called with rcu_read_lock() */
1133 static bool tcp_v4_inbound_md5_hash(const struct sock *sk,
1134                                     const struct sk_buff *skb)
1135 {
1136 #ifdef CONFIG_TCP_MD5SIG
1137         /*
1138          * This gets called for each TCP segment that arrives
1139          * so we want to be efficient.
1140          * We have 3 drop cases:
1141          * o No MD5 hash and one expected.
1142          * o MD5 hash and we're not expecting one.
1143          * o MD5 hash and its wrong.
1144          */
1145         const __u8 *hash_location = NULL;
1146         struct tcp_md5sig_key *hash_expected;
1147         const struct iphdr *iph = ip_hdr(skb);
1148         const struct tcphdr *th = tcp_hdr(skb);
1149         int genhash;
1150         unsigned char newhash[16];
1151 
1152         hash_expected = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&iph->saddr,
1153                                           AF_INET);
1154         hash_location = tcp_parse_md5sig_option(th);
1155 
1156         /* We've parsed the options - do we have a hash? */
1157         if (!hash_expected && !hash_location)
1158                 return false;
1159 
1160         if (hash_expected && !hash_location) {
1161                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
1162                 return true;
1163         }
1164 
1165         if (!hash_expected && hash_location) {
1166                 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
1167                 return true;
1168         }
1169 
1170         /* Okay, so this is hash_expected and hash_location -
1171          * so we need to calculate the checksum.
1172          */
1173         genhash = tcp_v4_md5_hash_skb(newhash,
1174                                       hash_expected,
1175                                       NULL, skb);
1176 
1177         if (genhash || memcmp(hash_location, newhash, 16) != 0) {
1178                 net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n",
1179                                      &iph->saddr, ntohs(th->source),
1180                                      &iph->daddr, ntohs(th->dest),
1181                                      genhash ? " tcp_v4_calc_md5_hash failed"
1182                                      : "");
1183                 return true;
1184         }
1185         return false;
1186 #endif
1187         return false;
1188 }
1189 
1190 static void tcp_v4_init_req(struct request_sock *req,
1191                             const struct sock *sk_listener,
1192                             struct sk_buff *skb)
1193 {
1194         struct inet_request_sock *ireq = inet_rsk(req);
1195 
1196         sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
1197         sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
1198         ireq->no_srccheck = inet_sk(sk_listener)->transparent;
1199         ireq->opt = tcp_v4_save_options(skb);
1200 }
1201 
1202 static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
1203                                           struct flowi *fl,
1204                                           const struct request_sock *req,
1205                                           bool *strict)
1206 {
1207         struct dst_entry *dst = inet_csk_route_req(sk, &fl->u.ip4, req);
1208 
1209         if (strict) {
1210                 if (fl->u.ip4.daddr == inet_rsk(req)->ir_rmt_addr)
1211                         *strict = true;
1212                 else
1213                         *strict = false;
1214         }
1215 
1216         return dst;
1217 }
1218 
1219 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
1220         .family         =       PF_INET,
1221         .obj_size       =       sizeof(struct tcp_request_sock),
1222         .rtx_syn_ack    =       tcp_rtx_synack,
1223         .send_ack       =       tcp_v4_reqsk_send_ack,
1224         .destructor     =       tcp_v4_reqsk_destructor,
1225         .send_reset     =       tcp_v4_send_reset,
1226         .syn_ack_timeout =      tcp_syn_ack_timeout,
1227 };
1228 
1229 static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = {
1230         .mss_clamp      =       TCP_MSS_DEFAULT,
1231 #ifdef CONFIG_TCP_MD5SIG
1232         .req_md5_lookup =       tcp_v4_md5_lookup,
1233         .calc_md5_hash  =       tcp_v4_md5_hash_skb,
1234 #endif
1235         .init_req       =       tcp_v4_init_req,
1236 #ifdef CONFIG_SYN_COOKIES
1237         .cookie_init_seq =      cookie_v4_init_sequence,
1238 #endif
1239         .route_req      =       tcp_v4_route_req,
1240         .init_seq       =       tcp_v4_init_sequence,
1241         .send_synack    =       tcp_v4_send_synack,
1242 };
1243 
1244 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
1245 {
1246         /* Never answer to SYNs send to broadcast or multicast */
1247         if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
1248                 goto drop;
1249 
1250         return tcp_conn_request(&tcp_request_sock_ops,
1251                                 &tcp_request_sock_ipv4_ops, sk, skb);
1252 
1253 drop:
1254         tcp_listendrop(sk);
1255         return 0;
1256 }
1257 EXPORT_SYMBOL(tcp_v4_conn_request);
1258 
1259 
1260 /*
1261  * The three way handshake has completed - we got a valid synack -
1262  * now create the new socket.
1263  */
1264 struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1265                                   struct request_sock *req,
1266                                   struct dst_entry *dst,
1267                                   struct request_sock *req_unhash,
1268                                   bool *own_req)
1269 {
1270         struct inet_request_sock *ireq;
1271         struct inet_sock *newinet;
1272         struct tcp_sock *newtp;
1273         struct sock *newsk;
1274 #ifdef CONFIG_TCP_MD5SIG
1275         struct tcp_md5sig_key *key;
1276 #endif
1277         struct ip_options_rcu *inet_opt;
1278 
1279         if (sk_acceptq_is_full(sk))
1280                 goto exit_overflow;
1281 
1282         newsk = tcp_create_openreq_child(sk, req, skb);
1283         if (!newsk)
1284                 goto exit_nonewsk;
1285 
1286         newsk->sk_gso_type = SKB_GSO_TCPV4;
1287         inet_sk_rx_dst_set(newsk, skb);
1288 
1289         newtp                 = tcp_sk(newsk);
1290         newinet               = inet_sk(newsk);
1291         ireq                  = inet_rsk(req);
1292         sk_daddr_set(newsk, ireq->ir_rmt_addr);
1293         sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
1294         newsk->sk_bound_dev_if = ireq->ir_iif;
1295         newinet->inet_saddr           = ireq->ir_loc_addr;
1296         inet_opt              = ireq->opt;
1297         rcu_assign_pointer(newinet->inet_opt, inet_opt);
1298         ireq->opt             = NULL;
1299         newinet->mc_index     = inet_iif(skb);
1300         newinet->mc_ttl       = ip_hdr(skb)->ttl;
1301         newinet->rcv_tos      = ip_hdr(skb)->tos;
1302         inet_csk(newsk)->icsk_ext_hdr_len = 0;
1303         if (inet_opt)
1304                 inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
1305         newinet->inet_id = newtp->write_seq ^ jiffies;
1306 
1307         if (!dst) {
1308                 dst = inet_csk_route_child_sock(sk, newsk, req);
1309                 if (!dst)
1310                         goto put_and_exit;
1311         } else {
1312                 /* syncookie case : see end of cookie_v4_check() */
1313         }
1314         sk_setup_caps(newsk, dst);
1315 
1316         tcp_ca_openreq_child(newsk, dst);
1317 
1318         tcp_sync_mss(newsk, dst_mtu(dst));
1319         newtp->advmss = dst_metric_advmss(dst);
1320         if (tcp_sk(sk)->rx_opt.user_mss &&
1321             tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1322                 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1323 
1324         tcp_initialize_rcv_mss(newsk);
1325 
1326 #ifdef CONFIG_TCP_MD5SIG
1327         /* Copy over the MD5 key from the original socket */
1328         key = tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&newinet->inet_daddr,
1329                                 AF_INET);
1330         if (key) {
1331                 /*
1332                  * We're using one, so create a matching key
1333                  * on the newsk structure. If we fail to get
1334                  * memory, then we end up not copying the key
1335                  * across. Shucks.
1336                  */
1337                 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr,
1338                                AF_INET, key->key, key->keylen, GFP_ATOMIC);
1339                 sk_nocaps_add(newsk, NETIF_F_GSO_MASK);
1340         }
1341 #endif
1342 
1343         if (__inet_inherit_port(sk, newsk) < 0)
1344                 goto put_and_exit;
1345         *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1346         if (*own_req)
1347                 tcp_move_syn(newtp, req);
1348 
1349         return newsk;
1350 
1351 exit_overflow:
1352         NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1353 exit_nonewsk:
1354         dst_release(dst);
1355 exit:
1356         tcp_listendrop(sk);
1357         return NULL;
1358 put_and_exit:
1359         inet_csk_prepare_forced_close(newsk);
1360         tcp_done(newsk);
1361         goto exit;
1362 }
1363 EXPORT_SYMBOL(tcp_v4_syn_recv_sock);
1364 
1365 static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb)
1366 {
1367 #ifdef CONFIG_SYN_COOKIES
1368         const struct tcphdr *th = tcp_hdr(skb);
1369 
1370         if (!th->syn)
1371                 sk = cookie_v4_check(sk, skb);
1372 #endif
1373         return sk;
1374 }
1375 
1376 /* The socket must have it's spinlock held when we get
1377  * here, unless it is a TCP_LISTEN socket.
1378  *
1379  * We have a potential double-lock case here, so even when
1380  * doing backlog processing we use the BH locking scheme.
1381  * This is because we cannot sleep with the original spinlock
1382  * held.
1383  */
1384 int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
1385 {
1386         struct sock *rsk;
1387 
1388         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1389                 struct dst_entry *dst = sk->sk_rx_dst;
1390 
1391                 sock_rps_save_rxhash(sk, skb);
1392                 sk_mark_napi_id(sk, skb);
1393                 if (dst) {
1394                         if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1395                             !dst->ops->check(dst, 0)) {
1396                                 dst_release(dst);
1397                                 sk->sk_rx_dst = NULL;
1398                         }
1399                 }
1400                 tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len);
1401                 return 0;
1402         }
1403 
1404         if (tcp_checksum_complete(skb))
1405                 goto csum_err;
1406 
1407         if (sk->sk_state == TCP_LISTEN) {
1408                 struct sock *nsk = tcp_v4_cookie_check(sk, skb);
1409 
1410                 if (!nsk)
1411                         goto discard;
1412                 if (nsk != sk) {
1413                         sock_rps_save_rxhash(nsk, skb);
1414                         sk_mark_napi_id(nsk, skb);
1415                         if (tcp_child_process(sk, nsk, skb)) {
1416                                 rsk = nsk;
1417                                 goto reset;
1418                         }
1419                         return 0;
1420                 }
1421         } else
1422                 sock_rps_save_rxhash(sk, skb);
1423 
1424         if (tcp_rcv_state_process(sk, skb)) {
1425                 rsk = sk;
1426                 goto reset;
1427         }
1428         return 0;
1429 
1430 reset:
1431         tcp_v4_send_reset(rsk, skb);
1432 discard:
1433         kfree_skb(skb);
1434         /* Be careful here. If this function gets more complicated and
1435          * gcc suffers from register pressure on the x86, sk (in %ebx)
1436          * might be destroyed here. This current version compiles correctly,
1437          * but you have been warned.
1438          */
1439         return 0;
1440 
1441 csum_err:
1442         TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1443         TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1444         goto discard;
1445 }
1446 EXPORT_SYMBOL(tcp_v4_do_rcv);
1447 
1448 void tcp_v4_early_demux(struct sk_buff *skb)
1449 {
1450         const struct iphdr *iph;
1451         const struct tcphdr *th;
1452         struct sock *sk;
1453 
1454         if (skb->pkt_type != PACKET_HOST)
1455                 return;
1456 
1457         if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1458                 return;
1459 
1460         iph = ip_hdr(skb);
1461         th = tcp_hdr(skb);
1462 
1463         if (th->doff < sizeof(struct tcphdr) / 4)
1464                 return;
1465 
1466         sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1467                                        iph->saddr, th->source,
1468                                        iph->daddr, ntohs(th->dest),
1469                                        skb->skb_iif);
1470         if (sk) {
1471                 skb->sk = sk;
1472                 skb->destructor = sock_edemux;
1473                 if (sk_fullsock(sk)) {
1474                         struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1475 
1476                         if (dst)
1477                                 dst = dst_check(dst, 0);
1478                         if (dst &&
1479                             inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1480                                 skb_dst_set_noref(skb, dst);
1481                 }
1482         }
1483 }
1484 
1485 /* Packet is added to VJ-style prequeue for processing in process
1486  * context, if a reader task is waiting. Apparently, this exciting
1487  * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93)
1488  * failed somewhere. Latency? Burstiness? Well, at least now we will
1489  * see, why it failed. 8)8)                               --ANK
1490  *
1491  */
1492 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
1493 {
1494         struct tcp_sock *tp = tcp_sk(sk);
1495 
1496         if (sysctl_tcp_low_latency || !tp->ucopy.task)
1497                 return false;
1498 
1499         if (skb->len <= tcp_hdrlen(skb) &&
1500             skb_queue_len(&tp->ucopy.prequeue) == 0)
1501                 return false;
1502 
1503         /* Before escaping RCU protected region, we need to take care of skb
1504          * dst. Prequeue is only enabled for established sockets.
1505          * For such sockets, we might need the skb dst only to set sk->sk_rx_dst
1506          * Instead of doing full sk_rx_dst validity here, let's perform
1507          * an optimistic check.
1508          */
1509         if (likely(sk->sk_rx_dst))
1510                 skb_dst_drop(skb);
1511         else
1512                 skb_dst_force_safe(skb);
1513 
1514         __skb_queue_tail(&tp->ucopy.prequeue, skb);
1515         tp->ucopy.memory += skb->truesize;
1516         if (skb_queue_len(&tp->ucopy.prequeue) >= 32 ||
1517             tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) {
1518                 struct sk_buff *skb1;
1519 
1520                 BUG_ON(sock_owned_by_user(sk));
1521                 __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED,
1522                                 skb_queue_len(&tp->ucopy.prequeue));
1523 
1524                 while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL)
1525                         sk_backlog_rcv(sk, skb1);
1526 
1527                 tp->ucopy.memory = 0;
1528         } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) {
1529                 wake_up_interruptible_sync_poll(sk_sleep(sk),
1530                                            POLLIN | POLLRDNORM | POLLRDBAND);
1531                 if (!inet_csk_ack_scheduled(sk))
1532                         inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
1533                                                   (3 * tcp_rto_min(sk)) / 4,
1534                                                   TCP_RTO_MAX);
1535         }
1536         return true;
1537 }
1538 EXPORT_SYMBOL(tcp_prequeue);
1539 
1540 int tcp_filter(struct sock *sk, struct sk_buff *skb)
1541 {
1542         struct tcphdr *th = (struct tcphdr *)skb->data;
1543         unsigned int eaten = skb->len;
1544         int err;
1545 
1546         err = sk_filter_trim_cap(sk, skb, th->doff * 4);
1547         if (!err) {
1548                 eaten -= skb->len;
1549                 TCP_SKB_CB(skb)->end_seq -= eaten;
1550         }
1551         return err;
1552 }
1553 EXPORT_SYMBOL(tcp_filter);
1554 
1555 /*
1556  *      From tcp_input.c
1557  */
1558 
1559 int tcp_v4_rcv(struct sk_buff *skb)
1560 {
1561         struct net *net = dev_net(skb->dev);
1562         const struct iphdr *iph;
1563         const struct tcphdr *th;
1564         bool refcounted;
1565         struct sock *sk;
1566         int ret;
1567 
1568         if (skb->pkt_type != PACKET_HOST)
1569                 goto discard_it;
1570 
1571         /* Count it even if it's bad */
1572         __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1573 
1574         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1575                 goto discard_it;
1576 
1577         th = (const struct tcphdr *)skb->data;
1578 
1579         if (unlikely(th->doff < sizeof(struct tcphdr) / 4))
1580                 goto bad_packet;
1581         if (!pskb_may_pull(skb, th->doff * 4))
1582                 goto discard_it;
1583 
1584         /* An explanation is required here, I think.
1585          * Packet length and doff are validated by header prediction,
1586          * provided case of th->doff==0 is eliminated.
1587          * So, we defer the checks. */
1588 
1589         if (skb_checksum_init(skb, IPPROTO_TCP, inet_compute_pseudo))
1590                 goto csum_error;
1591 
1592         th = (const struct tcphdr *)skb->data;
1593         iph = ip_hdr(skb);
1594         /* This is tricky : We move IPCB at its correct location into TCP_SKB_CB()
1595          * barrier() makes sure compiler wont play fool^Waliasing games.
1596          */
1597         memmove(&TCP_SKB_CB(skb)->header.h4, IPCB(skb),
1598                 sizeof(struct inet_skb_parm));
1599         barrier();
1600 
1601         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1602         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1603                                     skb->len - th->doff * 4);
1604         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1605         TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1606         TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1607         TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph);
1608         TCP_SKB_CB(skb)->sacked  = 0;
1609 
1610 lookup:
1611         sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source,
1612                                th->dest, &refcounted);
1613         if (!sk)
1614                 goto no_tcp_socket;
1615 
1616 process:
1617         if (sk->sk_state == TCP_TIME_WAIT)
1618                 goto do_time_wait;
1619 
1620         if (sk->sk_state == TCP_NEW_SYN_RECV) {
1621                 struct request_sock *req = inet_reqsk(sk);
1622                 struct sock *nsk;
1623 
1624                 sk = req->rsk_listener;
1625                 if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) {
1626                         reqsk_put(req);
1627                         goto discard_it;
1628                 }
1629                 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1630                         inet_csk_reqsk_queue_drop_and_put(sk, req);
1631                         goto lookup;
1632                 }
1633                 /* We own a reference on the listener, increase it again
1634                  * as we might lose it too soon.
1635                  */
1636                 sock_hold(sk);
1637                 refcounted = true;
1638                 nsk = tcp_check_req(sk, skb, req, false);
1639                 if (!nsk) {
1640                         reqsk_put(req);
1641                         goto discard_and_relse;
1642                 }
1643                 if (nsk == sk) {
1644                         reqsk_put(req);
1645                 } else if (tcp_child_process(sk, nsk, skb)) {
1646                         tcp_v4_send_reset(nsk, skb);
1647                         goto discard_and_relse;
1648                 } else {
1649                         sock_put(sk);
1650                         return 0;
1651                 }
1652         }
1653         if (unlikely(iph->ttl < inet_sk(sk)->min_ttl)) {
1654                 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1655                 goto discard_and_relse;
1656         }
1657 
1658         if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
1659                 goto discard_and_relse;
1660 
1661         if (tcp_v4_inbound_md5_hash(sk, skb))
1662                 goto discard_and_relse;
1663 
1664         nf_reset(skb);
1665 
1666         if (tcp_filter(sk, skb))
1667                 goto discard_and_relse;
1668         th = (const struct tcphdr *)skb->data;
1669         iph = ip_hdr(skb);
1670 
1671         skb->dev = NULL;
1672 
1673         if (sk->sk_state == TCP_LISTEN) {
1674                 ret = tcp_v4_do_rcv(sk, skb);
1675                 goto put_and_return;
1676         }
1677 
1678         sk_incoming_cpu_update(sk);
1679 
1680         bh_lock_sock_nested(sk);
1681         tcp_segs_in(tcp_sk(sk), skb);
1682         ret = 0;
1683         if (!sock_owned_by_user(sk)) {
1684                 if (!tcp_prequeue(sk, skb))
1685                         ret = tcp_v4_do_rcv(sk, skb);
1686         } else if (unlikely(sk_add_backlog(sk, skb,
1687                                            sk->sk_rcvbuf + sk->sk_sndbuf))) {
1688                 bh_unlock_sock(sk);
1689                 __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP);
1690                 goto discard_and_relse;
1691         }
1692         bh_unlock_sock(sk);
1693 
1694 put_and_return:
1695         if (refcounted)
1696                 sock_put(sk);
1697 
1698         return ret;
1699 
1700 no_tcp_socket:
1701         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
1702                 goto discard_it;
1703 
1704         if (tcp_checksum_complete(skb)) {
1705 csum_error:
1706                 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1707 bad_packet:
1708                 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1709         } else {
1710                 tcp_v4_send_reset(NULL, skb);
1711         }
1712 
1713 discard_it:
1714         /* Discard frame. */
1715         kfree_skb(skb);
1716         return 0;
1717 
1718 discard_and_relse:
1719         sk_drops_add(sk, skb);
1720         if (refcounted)
1721                 sock_put(sk);
1722         goto discard_it;
1723 
1724 do_time_wait:
1725         if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1726                 inet_twsk_put(inet_twsk(sk));
1727                 goto discard_it;
1728         }
1729 
1730         if (tcp_checksum_complete(skb)) {
1731                 inet_twsk_put(inet_twsk(sk));
1732                 goto csum_error;
1733         }
1734         switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1735         case TCP_TW_SYN: {
1736                 struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev),
1737                                                         &tcp_hashinfo, skb,
1738                                                         __tcp_hdrlen(th),
1739                                                         iph->saddr, th->source,
1740                                                         iph->daddr, th->dest,
1741                                                         inet_iif(skb));
1742                 if (sk2) {
1743                         inet_twsk_deschedule_put(inet_twsk(sk));
1744                         sk = sk2;
1745                         refcounted = false;
1746                         goto process;
1747                 }
1748                 /* Fall through to ACK */
1749         }
1750         case TCP_TW_ACK:
1751                 tcp_v4_timewait_ack(sk, skb);
1752                 break;
1753         case TCP_TW_RST:
1754                 tcp_v4_send_reset(sk, skb);
1755                 inet_twsk_deschedule_put(inet_twsk(sk));
1756                 goto discard_it;
1757         case TCP_TW_SUCCESS:;
1758         }
1759         goto discard_it;
1760 }
1761 
1762 static struct timewait_sock_ops tcp_timewait_sock_ops = {
1763         .twsk_obj_size  = sizeof(struct tcp_timewait_sock),
1764         .twsk_unique    = tcp_twsk_unique,
1765         .twsk_destructor= tcp_twsk_destructor,
1766 };
1767 
1768 void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
1769 {
1770         struct dst_entry *dst = skb_dst(skb);
1771 
1772         if (dst && dst_hold_safe(dst)) {
1773                 sk->sk_rx_dst = dst;
1774                 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
1775         }
1776 }
1777 EXPORT_SYMBOL(inet_sk_rx_dst_set);
1778 
1779 const struct inet_connection_sock_af_ops ipv4_specific = {
1780         .queue_xmit        = ip_queue_xmit,
1781         .send_check        = tcp_v4_send_check,
1782         .rebuild_header    = inet_sk_rebuild_header,
1783         .sk_rx_dst_set     = inet_sk_rx_dst_set,
1784         .conn_request      = tcp_v4_conn_request,
1785         .syn_recv_sock     = tcp_v4_syn_recv_sock,
1786         .net_header_len    = sizeof(struct iphdr),
1787         .setsockopt        = ip_setsockopt,
1788         .getsockopt        = ip_getsockopt,
1789         .addr2sockaddr     = inet_csk_addr2sockaddr,
1790         .sockaddr_len      = sizeof(struct sockaddr_in),
1791         .bind_conflict     = inet_csk_bind_conflict,
1792 #ifdef CONFIG_COMPAT
1793         .compat_setsockopt = compat_ip_setsockopt,
1794         .compat_getsockopt = compat_ip_getsockopt,
1795 #endif
1796         .mtu_reduced       = tcp_v4_mtu_reduced,
1797 };
1798 EXPORT_SYMBOL(ipv4_specific);
1799 
1800 #ifdef CONFIG_TCP_MD5SIG
1801 static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = {
1802         .md5_lookup             = tcp_v4_md5_lookup,
1803         .calc_md5_hash          = tcp_v4_md5_hash_skb,
1804         .md5_parse              = tcp_v4_parse_md5_keys,
1805 };
1806 #endif
1807 
1808 /* NOTE: A lot of things set to zero explicitly by call to
1809  *       sk_alloc() so need not be done here.
1810  */
1811 static int tcp_v4_init_sock(struct sock *sk)
1812 {
1813         struct inet_connection_sock *icsk = inet_csk(sk);
1814 
1815         tcp_init_sock(sk);
1816 
1817         icsk->icsk_af_ops = &ipv4_specific;
1818 
1819 #ifdef CONFIG_TCP_MD5SIG
1820         tcp_sk(sk)->af_specific = &tcp_sock_ipv4_specific;
1821 #endif
1822 
1823         return 0;
1824 }
1825 
1826 void tcp_v4_destroy_sock(struct sock *sk)
1827 {
1828         struct tcp_sock *tp = tcp_sk(sk);
1829 
1830         tcp_clear_xmit_timers(sk);
1831 
1832         tcp_cleanup_congestion_control(sk);
1833 
1834         /* Cleanup up the write buffer. */
1835         tcp_write_queue_purge(sk);
1836 
1837         /* Cleans up our, hopefully empty, out_of_order_queue. */
1838         __skb_queue_purge(&tp->out_of_order_queue);
1839 
1840 #ifdef CONFIG_TCP_MD5SIG
1841         /* Clean up the MD5 key list, if any */
1842         if (tp->md5sig_info) {
1843                 tcp_clear_md5_list(sk);
1844                 kfree_rcu(tp->md5sig_info, rcu);
1845                 tp->md5sig_info = NULL;
1846         }
1847 #endif
1848 
1849         /* Clean prequeue, it must be empty really */
1850         __skb_queue_purge(&tp->ucopy.prequeue);
1851 
1852         /* Clean up a referenced TCP bind bucket. */
1853         if (inet_csk(sk)->icsk_bind_hash)
1854                 inet_put_port(sk);
1855 
1856         BUG_ON(tp->fastopen_rsk);
1857 
1858         /* If socket is aborted during connect operation */
1859         tcp_free_fastopen_req(tp);
1860         tcp_saved_syn_free(tp);
1861 
1862         local_bh_disable();
1863         sk_sockets_allocated_dec(sk);
1864         local_bh_enable();
1865 
1866         if (mem_cgroup_sockets_enabled && sk->sk_memcg)
1867                 sock_release_memcg(sk);
1868 }
1869 EXPORT_SYMBOL(tcp_v4_destroy_sock);
1870 
1871 #ifdef CONFIG_PROC_FS
1872 /* Proc filesystem TCP sock list dumping. */
1873 
1874 /*
1875  * Get next listener socket follow cur.  If cur is NULL, get first socket
1876  * starting from bucket given in st->bucket; when st->bucket is zero the
1877  * very first socket in the hash table is returned.
1878  */
1879 static void *listening_get_next(struct seq_file *seq, void *cur)
1880 {
1881         struct tcp_iter_state *st = seq->private;
1882         struct net *net = seq_file_net(seq);
1883         struct inet_listen_hashbucket *ilb;
1884         struct inet_connection_sock *icsk;
1885         struct sock *sk = cur;
1886 
1887         if (!sk) {
1888 get_head:
1889                 ilb = &tcp_hashinfo.listening_hash[st->bucket];
1890                 spin_lock_bh(&ilb->lock);
1891                 sk = sk_head(&ilb->head);
1892                 st->offset = 0;
1893                 goto get_sk;
1894         }
1895         ilb = &tcp_hashinfo.listening_hash[st->bucket];
1896         ++st->num;
1897         ++st->offset;
1898 
1899         sk = sk_next(sk);
1900 get_sk:
1901         sk_for_each_from(sk) {
1902                 if (!net_eq(sock_net(sk), net))
1903                         continue;
1904                 if (sk->sk_family == st->family)
1905                         return sk;
1906                 icsk = inet_csk(sk);
1907         }
1908         spin_unlock_bh(&ilb->lock);
1909         st->offset = 0;
1910         if (++st->bucket < INET_LHTABLE_SIZE)
1911                 goto get_head;
1912         return NULL;
1913 }
1914 
1915 static void *listening_get_idx(struct seq_file *seq, loff_t *pos)
1916 {
1917         struct tcp_iter_state *st = seq->private;
1918         void *rc;
1919 
1920         st->bucket = 0;
1921         st->offset = 0;
1922         rc = listening_get_next(seq, NULL);
1923 
1924         while (rc && *pos) {
1925                 rc = listening_get_next(seq, rc);
1926                 --*pos;
1927         }
1928         return rc;
1929 }
1930 
1931 static inline bool empty_bucket(const struct tcp_iter_state *st)
1932 {
1933         return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain);
1934 }
1935 
1936 /*
1937  * Get first established socket starting from bucket given in st->bucket.
1938  * If st->bucket is zero, the very first socket in the hash is returned.
1939  */
1940 static void *established_get_first(struct seq_file *seq)
1941 {
1942         struct tcp_iter_state *st = seq->private;
1943         struct net *net = seq_file_net(seq);
1944         void *rc = NULL;
1945 
1946         st->offset = 0;
1947         for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) {
1948                 struct sock *sk;
1949                 struct hlist_nulls_node *node;
1950                 spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
1951 
1952                 /* Lockless fast path for the common case of empty buckets */
1953                 if (empty_bucket(st))
1954                         continue;
1955 
1956                 spin_lock_bh(lock);
1957                 sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
1958                         if (sk->sk_family != st->family ||
1959                             !net_eq(sock_net(sk), net)) {
1960                                 continue;
1961                         }
1962                         rc = sk;
1963                         goto out;
1964                 }
1965                 spin_unlock_bh(lock);
1966         }
1967 out:
1968         return rc;
1969 }
1970 
1971 static void *established_get_next(struct seq_file *seq, void *cur)
1972 {
1973         struct sock *sk = cur;
1974         struct hlist_nulls_node *node;
1975         struct tcp_iter_state *st = seq->private;
1976         struct net *net = seq_file_net(seq);
1977 
1978         ++st->num;
1979         ++st->offset;
1980 
1981         sk = sk_nulls_next(sk);
1982 
1983         sk_nulls_for_each_from(sk, node) {
1984                 if (sk->sk_family == st->family && net_eq(sock_net(sk), net))
1985                         return sk;
1986         }
1987 
1988         spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
1989         ++st->bucket;
1990         return established_get_first(seq);
1991 }
1992 
1993 static void *established_get_idx(struct seq_file *seq, loff_t pos)
1994 {
1995         struct tcp_iter_state *st = seq->private;
1996         void *rc;
1997 
1998         st->bucket = 0;
1999         rc = established_get_first(seq);
2000 
2001         while (rc && pos) {
2002                 rc = established_get_next(seq, rc);
2003                 --pos;
2004         }
2005         return rc;
2006 }
2007 
2008 static void *tcp_get_idx(struct seq_file *seq, loff_t pos)
2009 {
2010         void *rc;
2011         struct tcp_iter_state *st = seq->private;
2012 
2013         st->state = TCP_SEQ_STATE_LISTENING;
2014         rc        = listening_get_idx(seq, &pos);
2015 
2016         if (!rc) {
2017                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2018                 rc        = established_get_idx(seq, pos);
2019         }
2020 
2021         return rc;
2022 }
2023 
2024 static void *tcp_seek_last_pos(struct seq_file *seq)
2025 {
2026         struct tcp_iter_state *st = seq->private;
2027         int offset = st->offset;
2028         int orig_num = st->num;
2029         void *rc = NULL;
2030 
2031         switch (st->state) {
2032         case TCP_SEQ_STATE_LISTENING:
2033                 if (st->bucket >= INET_LHTABLE_SIZE)
2034                         break;
2035                 st->state = TCP_SEQ_STATE_LISTENING;
2036                 rc = listening_get_next(seq, NULL);
2037                 while (offset-- && rc)
2038                         rc = listening_get_next(seq, rc);
2039                 if (rc)
2040                         break;
2041                 st->bucket = 0;
2042                 st->state = TCP_SEQ_STATE_ESTABLISHED;
2043                 /* Fallthrough */
2044         case TCP_SEQ_STATE_ESTABLISHED:
2045                 if (st->bucket > tcp_hashinfo.ehash_mask)
2046                         break;
2047                 rc = established_get_first(seq);
2048                 while (offset-- && rc)
2049                         rc = established_get_next(seq, rc);
2050         }
2051 
2052         st->num = orig_num;
2053 
2054         return rc;
2055 }
2056 
2057 static void *tcp_seq_start(struct seq_file *seq, loff_t *pos)
2058 {
2059         struct tcp_iter_state *st = seq->private;
2060         void *rc;
2061 
2062         if (*pos && *pos == st->last_pos) {
2063                 rc = tcp_seek_last_pos(seq);
2064                 if (rc)
2065                         goto out;
2066         }
2067 
2068         st->state = TCP_SEQ_STATE_LISTENING;
2069         st->num = 0;
2070         st->bucket = 0;
2071         st->offset = 0;
2072         rc = *pos ? tcp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2073 
2074 out:
2075         st->last_pos = *pos;
2076         return rc;
2077 }
2078 
2079 static void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2080 {
2081         struct tcp_iter_state *st = seq->private;
2082         void *rc = NULL;
2083 
2084         if (v == SEQ_START_TOKEN) {
2085                 rc = tcp_get_idx(seq, 0);
2086                 goto out;
2087         }
2088 
2089         switch (st->state) {
2090         case TCP_SEQ_STATE_LISTENING:
2091                 rc = listening_get_next(seq, v);
2092                 if (!rc) {
2093                         st->state = TCP_SEQ_STATE_ESTABLISHED;
2094                         st->bucket = 0;
2095                         st->offset = 0;
2096                         rc        = established_get_first(seq);
2097                 }
2098                 break;
2099         case TCP_SEQ_STATE_ESTABLISHED:
2100                 rc = established_get_next(seq, v);
2101                 break;
2102         }
2103 out:
2104         ++*pos;
2105         st->last_pos = *pos;
2106         return rc;
2107 }
2108 
2109 static void tcp_seq_stop(struct seq_file *seq, void *v)
2110 {
2111         struct tcp_iter_state *st = seq->private;
2112 
2113         switch (st->state) {
2114         case TCP_SEQ_STATE_LISTENING:
2115                 if (v != SEQ_START_TOKEN)
2116                         spin_unlock_bh(&tcp_hashinfo.listening_hash[st->bucket].lock);
2117                 break;
2118         case TCP_SEQ_STATE_ESTABLISHED:
2119                 if (v)
2120                         spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
2121                 break;
2122         }
2123 }
2124 
2125 int tcp_seq_open(struct inode *inode, struct file *file)
2126 {
2127         struct tcp_seq_afinfo *afinfo = PDE_DATA(inode);
2128         struct tcp_iter_state *s;
2129         int err;
2130 
2131         err = seq_open_net(inode, file, &afinfo->seq_ops,
2132                           sizeof(struct tcp_iter_state));
2133         if (err < 0)
2134                 return err;
2135 
2136         s = ((struct seq_file *)file->private_data)->private;
2137         s->family               = afinfo->family;
2138         s->last_pos             = 0;
2139         return 0;
2140 }
2141 EXPORT_SYMBOL(tcp_seq_open);
2142 
2143 int tcp_proc_register(struct net *net, struct tcp_seq_afinfo *afinfo)
2144 {
2145         int rc = 0;
2146         struct proc_dir_entry *p;
2147 
2148         afinfo->seq_ops.start           = tcp_seq_start;
2149         afinfo->seq_ops.next            = tcp_seq_next;
2150         afinfo->seq_ops.stop            = tcp_seq_stop;
2151 
2152         p = proc_create_data(afinfo->name, S_IRUGO, net->proc_net,
2153                              afinfo->seq_fops, afinfo);
2154         if (!p)
2155                 rc = -ENOMEM;
2156         return rc;
2157 }
2158 EXPORT_SYMBOL(tcp_proc_register);
2159 
2160 void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo)
2161 {
2162         remove_proc_entry(afinfo->name, net->proc_net);
2163 }
2164 EXPORT_SYMBOL(tcp_proc_unregister);
2165 
2166 static void get_openreq4(const struct request_sock *req,
2167                          struct seq_file *f, int i)
2168 {
2169         const struct inet_request_sock *ireq = inet_rsk(req);
2170         long delta = req->rsk_timer.expires - jiffies;
2171 
2172         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2173                 " %02X %08X:%08X %02X:%08lX %08X %5u %8d %u %d %pK",
2174                 i,
2175                 ireq->ir_loc_addr,
2176                 ireq->ir_num,
2177                 ireq->ir_rmt_addr,
2178                 ntohs(ireq->ir_rmt_port),
2179                 TCP_SYN_RECV,
2180                 0, 0, /* could print option size, but that is af dependent. */
2181                 1,    /* timers active (only the expire timer) */
2182                 jiffies_delta_to_clock_t(delta),
2183                 req->num_timeout,
2184                 from_kuid_munged(seq_user_ns(f),
2185                                  sock_i_uid(req->rsk_listener)),
2186                 0,  /* non standard timer */
2187                 0, /* open_requests have no inode */
2188                 0,
2189                 req);
2190 }
2191 
2192 static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
2193 {
2194         int timer_active;
2195         unsigned long timer_expires;
2196         const struct tcp_sock *tp = tcp_sk(sk);
2197         const struct inet_connection_sock *icsk = inet_csk(sk);
2198         const struct inet_sock *inet = inet_sk(sk);
2199         const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
2200         __be32 dest = inet->inet_daddr;
2201         __be32 src = inet->inet_rcv_saddr;
2202         __u16 destp = ntohs(inet->inet_dport);
2203         __u16 srcp = ntohs(inet->inet_sport);
2204         int rx_queue;
2205         int state;
2206 
2207         if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
2208             icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
2209             icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
2210                 timer_active    = 1;
2211                 timer_expires   = icsk->icsk_timeout;
2212         } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2213                 timer_active    = 4;
2214                 timer_expires   = icsk->icsk_timeout;
2215         } else if (timer_pending(&sk->sk_timer)) {
2216                 timer_active    = 2;
2217                 timer_expires   = sk->sk_timer.expires;
2218         } else {
2219                 timer_active    = 0;
2220                 timer_expires = jiffies;
2221         }
2222 
2223         state = sk_state_load(sk);
2224         if (state == TCP_LISTEN)
2225                 rx_queue = sk->sk_ack_backlog;
2226         else
2227                 /* Because we don't lock the socket,
2228                  * we might find a transient negative value.
2229                  */
2230                 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
2231 
2232         seq_printf(f, "%4d: %08X:%04X %08X:%04X %02X %08X:%08X %02X:%08lX "
2233                         "%08X %5u %8d %lu %d %pK %lu %lu %u %u %d",
2234                 i, src, srcp, dest, destp, state,
2235                 tp->write_seq - tp->snd_una,
2236                 rx_queue,
2237                 timer_active,
2238                 jiffies_delta_to_clock_t(timer_expires - jiffies),
2239                 icsk->icsk_retransmits,
2240                 from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)),
2241                 icsk->icsk_probes_out,
2242                 sock_i_ino(sk),
2243                 atomic_read(&sk->sk_refcnt), sk,
2244                 jiffies_to_clock_t(icsk->icsk_rto),
2245                 jiffies_to_clock_t(icsk->icsk_ack.ato),
2246                 (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,
2247                 tp->snd_cwnd,
2248                 state == TCP_LISTEN ?
2249                     fastopenq->max_qlen :
2250                     (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh));
2251 }
2252 
2253 static void get_timewait4_sock(const struct inet_timewait_sock *tw,
2254                                struct seq_file *f, int i)
2255 {
2256         long delta = tw->tw_timer.expires - jiffies;
2257         __be32 dest, src;
2258         __u16 destp, srcp;
2259 
2260         dest  = tw->tw_daddr;
2261         src   = tw->tw_rcv_saddr;
2262         destp = ntohs(tw->tw_dport);
2263         srcp  = ntohs(tw->tw_sport);
2264 
2265         seq_printf(f, "%4d: %08X:%04X %08X:%04X"
2266                 " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK",
2267                 i, src, srcp, dest, destp, tw->tw_substate, 0, 0,
2268                 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2269                 atomic_read(&tw->tw_refcnt), tw);
2270 }
2271 
2272 #define TMPSZ 150
2273 
2274 static int tcp4_seq_show(struct seq_file *seq, void *v)
2275 {
2276         struct tcp_iter_state *st;
2277         struct sock *sk = v;
2278 
2279         seq_setwidth(seq, TMPSZ - 1);
2280         if (v == SEQ_START_TOKEN) {
2281                 seq_puts(seq, "  sl  local_address rem_address   st tx_queue "
2282                            "rx_queue tr tm->when retrnsmt   uid  timeout "
2283                            "inode");
2284                 goto out;
2285         }
2286         st = seq->private;
2287 
2288         if (sk->sk_state == TCP_TIME_WAIT)
2289                 get_timewait4_sock(v, seq, st->num);
2290         else if (sk->sk_state == TCP_NEW_SYN_RECV)
2291                 get_openreq4(v, seq, st->num);
2292         else
2293                 get_tcp4_sock(v, seq, st->num);
2294 out:
2295         seq_pad(seq, '\n');
2296         return 0;
2297 }
2298 
2299 static const struct file_operations tcp_afinfo_seq_fops = {
2300         .owner   = THIS_MODULE,
2301         .open    = tcp_seq_open,
2302         .read    = seq_read,
2303         .llseek  = seq_lseek,
2304         .release = seq_release_net
2305 };
2306 
2307 static struct tcp_seq_afinfo tcp4_seq_afinfo = {
2308         .name           = "tcp",
2309         .family         = AF_INET,
2310         .seq_fops       = &tcp_afinfo_seq_fops,
2311         .seq_ops        = {
2312                 .show           = tcp4_seq_show,
2313         },
2314 };
2315 
2316 static int __net_init tcp4_proc_init_net(struct net *net)
2317 {
2318         return tcp_proc_register(net, &tcp4_seq_afinfo);
2319 }
2320 
2321 static void __net_exit tcp4_proc_exit_net(struct net *net)
2322 {
2323         tcp_proc_unregister(net, &tcp4_seq_afinfo);
2324 }
2325 
2326 static struct pernet_operations tcp4_net_ops = {
2327         .init = tcp4_proc_init_net,
2328         .exit = tcp4_proc_exit_net,
2329 };
2330 
2331 int __init tcp4_proc_init(void)
2332 {
2333         return register_pernet_subsys(&tcp4_net_ops);
2334 }
2335 
2336 void tcp4_proc_exit(void)
2337 {
2338         unregister_pernet_subsys(&tcp4_net_ops);
2339 }
2340 #endif /* CONFIG_PROC_FS */
2341 
2342 struct proto tcp_prot = {
2343         .name                   = "TCP",
2344         .owner                  = THIS_MODULE,
2345         .close                  = tcp_close,
2346         .connect                = tcp_v4_connect,
2347         .disconnect             = tcp_disconnect,
2348         .accept                 = inet_csk_accept,
2349         .ioctl                  = tcp_ioctl,
2350         .init                   = tcp_v4_init_sock,
2351         .destroy                = tcp_v4_destroy_sock,
2352         .shutdown               = tcp_shutdown,
2353         .setsockopt             = tcp_setsockopt,
2354         .getsockopt             = tcp_getsockopt,
2355         .recvmsg                = tcp_recvmsg,
2356         .sendmsg                = tcp_sendmsg,
2357         .sendpage               = tcp_sendpage,
2358         .backlog_rcv            = tcp_v4_do_rcv,
2359         .release_cb             = tcp_release_cb,
2360         .hash                   = inet_hash,
2361         .unhash                 = inet_unhash,
2362         .get_port               = inet_csk_get_port,
2363         .enter_memory_pressure  = tcp_enter_memory_pressure,
2364         .stream_memory_free     = tcp_stream_memory_free,
2365         .sockets_allocated      = &tcp_sockets_allocated,
2366         .orphan_count           = &tcp_orphan_count,
2367         .memory_allocated       = &tcp_memory_allocated,
2368         .memory_pressure        = &tcp_memory_pressure,
2369         .sysctl_mem             = sysctl_tcp_mem,
2370         .sysctl_wmem            = sysctl_tcp_wmem,
2371         .sysctl_rmem            = sysctl_tcp_rmem,
2372         .max_header             = MAX_TCP_HEADER,
2373         .obj_size               = sizeof(struct tcp_sock),
2374         .slab_flags             = SLAB_DESTROY_BY_RCU,
2375         .twsk_prot              = &tcp_timewait_sock_ops,
2376         .rsk_prot               = &tcp_request_sock_ops,
2377         .h.hashinfo             = &tcp_hashinfo,
2378         .no_autobind            = true,
2379 #ifdef CONFIG_COMPAT
2380         .compat_setsockopt      = compat_tcp_setsockopt,
2381         .compat_getsockopt      = compat_tcp_getsockopt,
2382 #endif
2383         .diag_destroy           = tcp_abort,
2384 };
2385 EXPORT_SYMBOL(tcp_prot);
2386 
2387 static void __net_exit tcp_sk_exit(struct net *net)
2388 {
2389         int cpu;
2390 
2391         for_each_possible_cpu(cpu)
2392                 inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu));
2393         free_percpu(net->ipv4.tcp_sk);
2394 }
2395 
2396 static int __net_init tcp_sk_init(struct net *net)
2397 {
2398         int res, cpu;
2399 
2400         net->ipv4.tcp_sk = alloc_percpu(struct sock *);
2401         if (!net->ipv4.tcp_sk)
2402                 return -ENOMEM;
2403 
2404         for_each_possible_cpu(cpu) {
2405                 struct sock *sk;
2406 
2407                 res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW,
2408                                            IPPROTO_TCP, net);
2409                 if (res)
2410                         goto fail;
2411                 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2412                 *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
2413         }
2414 
2415         net->ipv4.sysctl_tcp_ecn = 2;
2416         net->ipv4.sysctl_tcp_ecn_fallback = 1;
2417 
2418         net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
2419         net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD;
2420         net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL;
2421 
2422         net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
2423         net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
2424         net->ipv4.sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL;
2425 
2426         net->ipv4.sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
2427         net->ipv4.sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES;
2428         net->ipv4.sysctl_tcp_syncookies = 1;
2429         net->ipv4.sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH;
2430         net->ipv4.sysctl_tcp_retries1 = TCP_RETR1;
2431         net->ipv4.sysctl_tcp_retries2 = TCP_RETR2;
2432         net->ipv4.sysctl_tcp_orphan_retries = 0;
2433         net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
2434         net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX;
2435 
2436         return 0;
2437 fail:
2438         tcp_sk_exit(net);
2439 
2440         return res;
2441 }
2442 
2443 static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list)
2444 {
2445         inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET);
2446 }
2447 
2448 static struct pernet_operations __net_initdata tcp_sk_ops = {
2449        .init       = tcp_sk_init,
2450        .exit       = tcp_sk_exit,
2451        .exit_batch = tcp_sk_exit_batch,
2452 };
2453 
2454 void __init tcp_v4_init(void)
2455 {
2456         inet_hashinfo_init(&tcp_hashinfo);
2457         if (register_pernet_subsys(&tcp_sk_ops))
2458                 panic("Failed to create the TCP control socket.\n");
2459 }
2460 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp