1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * TCP over IPv6 4 * Linux INET6 implementation 5 * 6 * Authors: 7 * Pedro Roque <roque@di.fc.ul.pt> 8 * 9 * Based on: 10 * linux/net/ipv4/tcp.c 11 * linux/net/ipv4/tcp_input.c 12 * linux/net/ipv4/tcp_output.c 13 * 14 * Fixes: 15 * Hideaki YOSHIFUJI : sin6_scope_id support 16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which 17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind 18 * a single port at the same time. 19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. 20 */ 21 22 #include <linux/bottom_half.h> 23 #include <linux/module.h> 24 #include <linux/errno.h> 25 #include <linux/types.h> 26 #include <linux/socket.h> 27 #include <linux/sockios.h> 28 #include <linux/net.h> 29 #include <linux/jiffies.h> 30 #include <linux/in.h> 31 #include <linux/in6.h> 32 #include <linux/netdevice.h> 33 #include <linux/init.h> 34 #include <linux/jhash.h> 35 #include <linux/ipsec.h> 36 #include <linux/times.h> 37 #include <linux/slab.h> 38 #include <linux/uaccess.h> 39 #include <linux/ipv6.h> 40 #include <linux/icmpv6.h> 41 #include <linux/random.h> 42 #include <linux/indirect_call_wrapper.h> 43 44 #include <net/tcp.h> 45 #include <net/ndisc.h> 46 #include <net/inet6_hashtables.h> 47 #include <net/inet6_connection_sock.h> 48 #include <net/ipv6.h> 49 #include <net/transp_v6.h> 50 #include <net/addrconf.h> 51 #include <net/ip6_route.h> 52 #include <net/ip6_checksum.h> 53 #include <net/inet_ecn.h> 54 #include <net/protocol.h> 55 #include <net/xfrm.h> 56 #include <net/snmp.h> 57 #include <net/dsfield.h> 58 #include <net/timewait_sock.h> 59 #include <net/inet_common.h> 60 #include <net/secure_seq.h> 61 #include <net/busy_poll.h> 62 63 #include <linux/proc_fs.h> 64 #include <linux/seq_file.h> 65 66 #include <crypto/hash.h> 67 #include <linux/scatterlist.h> 68 69 #include <trace/events/tcp.h> 70 71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); 72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 73 struct request_sock *req); 74 75 INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); 76 77 static const struct inet_connection_sock_af_ops ipv6_mapped; 78 const struct inet_connection_sock_af_ops ipv6_specific; 79 #ifdef CONFIG_TCP_MD5SIG 80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; 81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; 82 #else 83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 84 const struct in6_addr *addr, 85 int l3index) 86 { 87 return NULL; 88 } 89 #endif 90 91 /* Helper returning the inet6 address from a given tcp socket. 92 * It can be used in TCP stack instead of inet6_sk(sk). 93 * This avoids a dereference and allow compiler optimizations. 94 * It is a specialized version of inet6_sk_generic(). 95 */ 96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk) 97 { 98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo); 99 100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset); 101 } 102 103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) 104 { 105 struct dst_entry *dst = skb_dst(skb); 106 107 if (dst && dst_hold_safe(dst)) { 108 const struct rt6_info *rt = (const struct rt6_info *)dst; 109 110 rcu_assign_pointer(sk->sk_rx_dst, dst); 111 sk->sk_rx_dst_ifindex = skb->skb_iif; 112 sk->sk_rx_dst_cookie = rt6_get_cookie(rt); 113 } 114 } 115 116 static u32 tcp_v6_init_seq(const struct sk_buff *skb) 117 { 118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, 119 ipv6_hdr(skb)->saddr.s6_addr32, 120 tcp_hdr(skb)->dest, 121 tcp_hdr(skb)->source); 122 } 123 124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) 125 { 126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, 127 ipv6_hdr(skb)->saddr.s6_addr32); 128 } 129 130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, 131 int addr_len) 132 { 133 /* This check is replicated from tcp_v6_connect() and intended to 134 * prevent BPF program called below from accessing bytes that are out 135 * of the bound specified by user in addr_len. 136 */ 137 if (addr_len < SIN6_LEN_RFC2133) 138 return -EINVAL; 139 140 sock_owned_by_me(sk); 141 142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); 143 } 144 145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 146 int addr_len) 147 { 148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; 149 struct inet_sock *inet = inet_sk(sk); 150 struct inet_connection_sock *icsk = inet_csk(sk); 151 struct inet_timewait_death_row *tcp_death_row; 152 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 153 struct tcp_sock *tp = tcp_sk(sk); 154 struct in6_addr *saddr = NULL, *final_p, final; 155 struct ipv6_txoptions *opt; 156 struct flowi6 fl6; 157 struct dst_entry *dst; 158 int addr_type; 159 int err; 160 161 if (addr_len < SIN6_LEN_RFC2133) 162 return -EINVAL; 163 164 if (usin->sin6_family != AF_INET6) 165 return -EAFNOSUPPORT; 166 167 memset(&fl6, 0, sizeof(fl6)); 168 169 if (np->sndflow) { 170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; 171 IP6_ECN_flow_init(fl6.flowlabel); 172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { 173 struct ip6_flowlabel *flowlabel; 174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); 175 if (IS_ERR(flowlabel)) 176 return -EINVAL; 177 fl6_sock_release(flowlabel); 178 } 179 } 180 181 /* 182 * connect() to INADDR_ANY means loopback (BSD'ism). 183 */ 184 185 if (ipv6_addr_any(&usin->sin6_addr)) { 186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) 187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), 188 &usin->sin6_addr); 189 else 190 usin->sin6_addr = in6addr_loopback; 191 } 192 193 addr_type = ipv6_addr_type(&usin->sin6_addr); 194 195 if (addr_type & IPV6_ADDR_MULTICAST) 196 return -ENETUNREACH; 197 198 if (addr_type&IPV6_ADDR_LINKLOCAL) { 199 if (addr_len >= sizeof(struct sockaddr_in6) && 200 usin->sin6_scope_id) { 201 /* If interface is set while binding, indices 202 * must coincide. 203 */ 204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) 205 return -EINVAL; 206 207 sk->sk_bound_dev_if = usin->sin6_scope_id; 208 } 209 210 /* Connect to link-local address requires an interface */ 211 if (!sk->sk_bound_dev_if) 212 return -EINVAL; 213 } 214 215 if (tp->rx_opt.ts_recent_stamp && 216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { 217 tp->rx_opt.ts_recent = 0; 218 tp->rx_opt.ts_recent_stamp = 0; 219 WRITE_ONCE(tp->write_seq, 0); 220 } 221 222 sk->sk_v6_daddr = usin->sin6_addr; 223 np->flow_label = fl6.flowlabel; 224 225 /* 226 * TCP over IPv4 227 */ 228 229 if (addr_type & IPV6_ADDR_MAPPED) { 230 u32 exthdrlen = icsk->icsk_ext_hdr_len; 231 struct sockaddr_in sin; 232 233 if (ipv6_only_sock(sk)) 234 return -ENETUNREACH; 235 236 sin.sin_family = AF_INET; 237 sin.sin_port = usin->sin6_port; 238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; 239 240 icsk->icsk_af_ops = &ipv6_mapped; 241 if (sk_is_mptcp(sk)) 242 mptcpv6_handle_mapped(sk, true); 243 sk->sk_backlog_rcv = tcp_v4_do_rcv; 244 #ifdef CONFIG_TCP_MD5SIG 245 tp->af_specific = &tcp_sock_ipv6_mapped_specific; 246 #endif 247 248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); 249 250 if (err) { 251 icsk->icsk_ext_hdr_len = exthdrlen; 252 icsk->icsk_af_ops = &ipv6_specific; 253 if (sk_is_mptcp(sk)) 254 mptcpv6_handle_mapped(sk, false); 255 sk->sk_backlog_rcv = tcp_v6_do_rcv; 256 #ifdef CONFIG_TCP_MD5SIG 257 tp->af_specific = &tcp_sock_ipv6_specific; 258 #endif 259 goto failure; 260 } 261 np->saddr = sk->sk_v6_rcv_saddr; 262 263 return err; 264 } 265 266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) 267 saddr = &sk->sk_v6_rcv_saddr; 268 269 fl6.flowi6_proto = IPPROTO_TCP; 270 fl6.daddr = sk->sk_v6_daddr; 271 fl6.saddr = saddr ? *saddr : np->saddr; 272 fl6.flowi6_oif = sk->sk_bound_dev_if; 273 fl6.flowi6_mark = sk->sk_mark; 274 fl6.fl6_dport = usin->sin6_port; 275 fl6.fl6_sport = inet->inet_sport; 276 fl6.flowi6_uid = sk->sk_uid; 277 278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); 279 final_p = fl6_update_dst(&fl6, opt, &final); 280 281 security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); 282 283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); 284 if (IS_ERR(dst)) { 285 err = PTR_ERR(dst); 286 goto failure; 287 } 288 289 if (!saddr) { 290 saddr = &fl6.saddr; 291 sk->sk_v6_rcv_saddr = *saddr; 292 } 293 294 /* set the source address */ 295 np->saddr = *saddr; 296 inet->inet_rcv_saddr = LOOPBACK4_IPV6; 297 298 sk->sk_gso_type = SKB_GSO_TCPV6; 299 ip6_dst_store(sk, dst, NULL, NULL); 300 301 icsk->icsk_ext_hdr_len = 0; 302 if (opt) 303 icsk->icsk_ext_hdr_len = opt->opt_flen + 304 opt->opt_nflen; 305 306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); 307 308 inet->inet_dport = usin->sin6_port; 309 310 tcp_set_state(sk, TCP_SYN_SENT); 311 tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; 312 err = inet6_hash_connect(tcp_death_row, sk); 313 if (err) 314 goto late_failure; 315 316 sk_set_txhash(sk); 317 318 if (likely(!tp->repair)) { 319 if (!tp->write_seq) 320 WRITE_ONCE(tp->write_seq, 321 secure_tcpv6_seq(np->saddr.s6_addr32, 322 sk->sk_v6_daddr.s6_addr32, 323 inet->inet_sport, 324 inet->inet_dport)); 325 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), 326 np->saddr.s6_addr32, 327 sk->sk_v6_daddr.s6_addr32); 328 } 329 330 if (tcp_fastopen_defer_connect(sk, &err)) 331 return err; 332 if (err) 333 goto late_failure; 334 335 err = tcp_connect(sk); 336 if (err) 337 goto late_failure; 338 339 return 0; 340 341 late_failure: 342 tcp_set_state(sk, TCP_CLOSE); 343 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) 344 inet_reset_saddr(sk); 345 failure: 346 inet->inet_dport = 0; 347 sk->sk_route_caps = 0; 348 return err; 349 } 350 351 static void tcp_v6_mtu_reduced(struct sock *sk) 352 { 353 struct dst_entry *dst; 354 u32 mtu; 355 356 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) 357 return; 358 359 mtu = READ_ONCE(tcp_sk(sk)->mtu_info); 360 361 /* Drop requests trying to increase our current mss. 362 * Check done in __ip6_rt_update_pmtu() is too late. 363 */ 364 if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) 365 return; 366 367 dst = inet6_csk_update_pmtu(sk, mtu); 368 if (!dst) 369 return; 370 371 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { 372 tcp_sync_mss(sk, dst_mtu(dst)); 373 tcp_simple_retransmit(sk); 374 } 375 } 376 377 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 378 u8 type, u8 code, int offset, __be32 info) 379 { 380 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; 381 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); 382 struct net *net = dev_net(skb->dev); 383 struct request_sock *fastopen; 384 struct ipv6_pinfo *np; 385 struct tcp_sock *tp; 386 __u32 seq, snd_una; 387 struct sock *sk; 388 bool fatal; 389 int err; 390 391 sk = __inet6_lookup_established(net, &tcp_hashinfo, 392 &hdr->daddr, th->dest, 393 &hdr->saddr, ntohs(th->source), 394 skb->dev->ifindex, inet6_sdif(skb)); 395 396 if (!sk) { 397 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), 398 ICMP6_MIB_INERRORS); 399 return -ENOENT; 400 } 401 402 if (sk->sk_state == TCP_TIME_WAIT) { 403 inet_twsk_put(inet_twsk(sk)); 404 return 0; 405 } 406 seq = ntohl(th->seq); 407 fatal = icmpv6_err_convert(type, code, &err); 408 if (sk->sk_state == TCP_NEW_SYN_RECV) { 409 tcp_req_err(sk, seq, fatal); 410 return 0; 411 } 412 413 bh_lock_sock(sk); 414 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) 415 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); 416 417 if (sk->sk_state == TCP_CLOSE) 418 goto out; 419 420 if (static_branch_unlikely(&ip6_min_hopcount)) { 421 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 422 if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 423 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 424 goto out; 425 } 426 } 427 428 tp = tcp_sk(sk); 429 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ 430 fastopen = rcu_dereference(tp->fastopen_rsk); 431 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; 432 if (sk->sk_state != TCP_LISTEN && 433 !between(seq, snd_una, tp->snd_nxt)) { 434 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); 435 goto out; 436 } 437 438 np = tcp_inet6_sk(sk); 439 440 if (type == NDISC_REDIRECT) { 441 if (!sock_owned_by_user(sk)) { 442 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); 443 444 if (dst) 445 dst->ops->redirect(dst, sk, skb); 446 } 447 goto out; 448 } 449 450 if (type == ICMPV6_PKT_TOOBIG) { 451 u32 mtu = ntohl(info); 452 453 /* We are not interested in TCP_LISTEN and open_requests 454 * (SYN-ACKs send out by Linux are always <576bytes so 455 * they should go through unfragmented). 456 */ 457 if (sk->sk_state == TCP_LISTEN) 458 goto out; 459 460 if (!ip6_sk_accept_pmtu(sk)) 461 goto out; 462 463 if (mtu < IPV6_MIN_MTU) 464 goto out; 465 466 WRITE_ONCE(tp->mtu_info, mtu); 467 468 if (!sock_owned_by_user(sk)) 469 tcp_v6_mtu_reduced(sk); 470 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, 471 &sk->sk_tsq_flags)) 472 sock_hold(sk); 473 goto out; 474 } 475 476 477 /* Might be for an request_sock */ 478 switch (sk->sk_state) { 479 case TCP_SYN_SENT: 480 case TCP_SYN_RECV: 481 /* Only in fast or simultaneous open. If a fast open socket is 482 * already accepted it is treated as a connected one below. 483 */ 484 if (fastopen && !fastopen->sk) 485 break; 486 487 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); 488 489 if (!sock_owned_by_user(sk)) { 490 sk->sk_err = err; 491 sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ 492 493 tcp_done(sk); 494 } else 495 sk->sk_err_soft = err; 496 goto out; 497 case TCP_LISTEN: 498 break; 499 default: 500 /* check if this ICMP message allows revert of backoff. 501 * (see RFC 6069) 502 */ 503 if (!fastopen && type == ICMPV6_DEST_UNREACH && 504 code == ICMPV6_NOROUTE) 505 tcp_ld_RTO_revert(sk, seq); 506 } 507 508 if (!sock_owned_by_user(sk) && np->recverr) { 509 sk->sk_err = err; 510 sk_error_report(sk); 511 } else 512 sk->sk_err_soft = err; 513 514 out: 515 bh_unlock_sock(sk); 516 sock_put(sk); 517 return 0; 518 } 519 520 521 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, 522 struct flowi *fl, 523 struct request_sock *req, 524 struct tcp_fastopen_cookie *foc, 525 enum tcp_synack_type synack_type, 526 struct sk_buff *syn_skb) 527 { 528 struct inet_request_sock *ireq = inet_rsk(req); 529 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 530 struct ipv6_txoptions *opt; 531 struct flowi6 *fl6 = &fl->u.ip6; 532 struct sk_buff *skb; 533 int err = -ENOMEM; 534 u8 tclass; 535 536 /* First, grab a route. */ 537 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, 538 IPPROTO_TCP)) == NULL) 539 goto done; 540 541 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); 542 543 if (skb) { 544 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, 545 &ireq->ir_v6_rmt_addr); 546 547 fl6->daddr = ireq->ir_v6_rmt_addr; 548 if (np->repflow && ireq->pktopts) 549 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); 550 551 tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? 552 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | 553 (np->tclass & INET_ECN_MASK) : 554 np->tclass; 555 556 if (!INET_ECN_is_capable(tclass) && 557 tcp_bpf_ca_needs_ecn((struct sock *)req)) 558 tclass |= INET_ECN_ECT_0; 559 560 rcu_read_lock(); 561 opt = ireq->ipv6_opt; 562 if (!opt) 563 opt = rcu_dereference(np->opt); 564 err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt, 565 tclass, sk->sk_priority); 566 rcu_read_unlock(); 567 err = net_xmit_eval(err); 568 } 569 570 done: 571 return err; 572 } 573 574 575 static void tcp_v6_reqsk_destructor(struct request_sock *req) 576 { 577 kfree(inet_rsk(req)->ipv6_opt); 578 consume_skb(inet_rsk(req)->pktopts); 579 } 580 581 #ifdef CONFIG_TCP_MD5SIG 582 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, 583 const struct in6_addr *addr, 584 int l3index) 585 { 586 return tcp_md5_do_lookup(sk, l3index, 587 (union tcp_md5_addr *)addr, AF_INET6); 588 } 589 590 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, 591 const struct sock *addr_sk) 592 { 593 int l3index; 594 595 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), 596 addr_sk->sk_bound_dev_if); 597 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, 598 l3index); 599 } 600 601 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, 602 sockptr_t optval, int optlen) 603 { 604 struct tcp_md5sig cmd; 605 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; 606 int l3index = 0; 607 u8 prefixlen; 608 u8 flags; 609 610 if (optlen < sizeof(cmd)) 611 return -EINVAL; 612 613 if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) 614 return -EFAULT; 615 616 if (sin6->sin6_family != AF_INET6) 617 return -EINVAL; 618 619 flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; 620 621 if (optname == TCP_MD5SIG_EXT && 622 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { 623 prefixlen = cmd.tcpm_prefixlen; 624 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && 625 prefixlen > 32)) 626 return -EINVAL; 627 } else { 628 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; 629 } 630 631 if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && 632 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { 633 struct net_device *dev; 634 635 rcu_read_lock(); 636 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); 637 if (dev && netif_is_l3_master(dev)) 638 l3index = dev->ifindex; 639 rcu_read_unlock(); 640 641 /* ok to reference set/not set outside of rcu; 642 * right now device MUST be an L3 master 643 */ 644 if (!dev || !l3index) 645 return -EINVAL; 646 } 647 648 if (!cmd.tcpm_keylen) { 649 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 650 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 651 AF_INET, prefixlen, 652 l3index, flags); 653 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 654 AF_INET6, prefixlen, l3index, flags); 655 } 656 657 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) 658 return -EINVAL; 659 660 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) 661 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], 662 AF_INET, prefixlen, l3index, flags, 663 cmd.tcpm_key, cmd.tcpm_keylen, 664 GFP_KERNEL); 665 666 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr, 667 AF_INET6, prefixlen, l3index, flags, 668 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); 669 } 670 671 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp, 672 const struct in6_addr *daddr, 673 const struct in6_addr *saddr, 674 const struct tcphdr *th, int nbytes) 675 { 676 struct tcp6_pseudohdr *bp; 677 struct scatterlist sg; 678 struct tcphdr *_th; 679 680 bp = hp->scratch; 681 /* 1. TCP pseudo-header (RFC2460) */ 682 bp->saddr = *saddr; 683 bp->daddr = *daddr; 684 bp->protocol = cpu_to_be32(IPPROTO_TCP); 685 bp->len = cpu_to_be32(nbytes); 686 687 _th = (struct tcphdr *)(bp + 1); 688 memcpy(_th, th, sizeof(*th)); 689 _th->check = 0; 690 691 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); 692 ahash_request_set_crypt(hp->md5_req, &sg, NULL, 693 sizeof(*bp) + sizeof(*th)); 694 return crypto_ahash_update(hp->md5_req); 695 } 696 697 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, 698 const struct in6_addr *daddr, struct in6_addr *saddr, 699 const struct tcphdr *th) 700 { 701 struct tcp_md5sig_pool *hp; 702 struct ahash_request *req; 703 704 hp = tcp_get_md5sig_pool(); 705 if (!hp) 706 goto clear_hash_noput; 707 req = hp->md5_req; 708 709 if (crypto_ahash_init(req)) 710 goto clear_hash; 711 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2)) 712 goto clear_hash; 713 if (tcp_md5_hash_key(hp, key)) 714 goto clear_hash; 715 ahash_request_set_crypt(req, NULL, md5_hash, 0); 716 if (crypto_ahash_final(req)) 717 goto clear_hash; 718 719 tcp_put_md5sig_pool(); 720 return 0; 721 722 clear_hash: 723 tcp_put_md5sig_pool(); 724 clear_hash_noput: 725 memset(md5_hash, 0, 16); 726 return 1; 727 } 728 729 static int tcp_v6_md5_hash_skb(char *md5_hash, 730 const struct tcp_md5sig_key *key, 731 const struct sock *sk, 732 const struct sk_buff *skb) 733 { 734 const struct in6_addr *saddr, *daddr; 735 struct tcp_md5sig_pool *hp; 736 struct ahash_request *req; 737 const struct tcphdr *th = tcp_hdr(skb); 738 739 if (sk) { /* valid for establish/request sockets */ 740 saddr = &sk->sk_v6_rcv_saddr; 741 daddr = &sk->sk_v6_daddr; 742 } else { 743 const struct ipv6hdr *ip6h = ipv6_hdr(skb); 744 saddr = &ip6h->saddr; 745 daddr = &ip6h->daddr; 746 } 747 748 hp = tcp_get_md5sig_pool(); 749 if (!hp) 750 goto clear_hash_noput; 751 req = hp->md5_req; 752 753 if (crypto_ahash_init(req)) 754 goto clear_hash; 755 756 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len)) 757 goto clear_hash; 758 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2)) 759 goto clear_hash; 760 if (tcp_md5_hash_key(hp, key)) 761 goto clear_hash; 762 ahash_request_set_crypt(req, NULL, md5_hash, 0); 763 if (crypto_ahash_final(req)) 764 goto clear_hash; 765 766 tcp_put_md5sig_pool(); 767 return 0; 768 769 clear_hash: 770 tcp_put_md5sig_pool(); 771 clear_hash_noput: 772 memset(md5_hash, 0, 16); 773 return 1; 774 } 775 776 #endif 777 778 static void tcp_v6_init_req(struct request_sock *req, 779 const struct sock *sk_listener, 780 struct sk_buff *skb) 781 { 782 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); 783 struct inet_request_sock *ireq = inet_rsk(req); 784 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); 785 786 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; 787 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; 788 789 /* So that link locals have meaning */ 790 if ((!sk_listener->sk_bound_dev_if || l3_slave) && 791 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) 792 ireq->ir_iif = tcp_v6_iif(skb); 793 794 if (!TCP_SKB_CB(skb)->tcp_tw_isn && 795 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || 796 np->rxopt.bits.rxinfo || 797 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || 798 np->rxopt.bits.rxohlim || np->repflow)) { 799 refcount_inc(&skb->users); 800 ireq->pktopts = skb; 801 } 802 } 803 804 static struct dst_entry *tcp_v6_route_req(const struct sock *sk, 805 struct sk_buff *skb, 806 struct flowi *fl, 807 struct request_sock *req) 808 { 809 tcp_v6_init_req(req, sk, skb); 810 811 if (security_inet_conn_request(sk, skb, req)) 812 return NULL; 813 814 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); 815 } 816 817 struct request_sock_ops tcp6_request_sock_ops __read_mostly = { 818 .family = AF_INET6, 819 .obj_size = sizeof(struct tcp6_request_sock), 820 .rtx_syn_ack = tcp_rtx_synack, 821 .send_ack = tcp_v6_reqsk_send_ack, 822 .destructor = tcp_v6_reqsk_destructor, 823 .send_reset = tcp_v6_send_reset, 824 .syn_ack_timeout = tcp_syn_ack_timeout, 825 }; 826 827 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { 828 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - 829 sizeof(struct ipv6hdr), 830 #ifdef CONFIG_TCP_MD5SIG 831 .req_md5_lookup = tcp_v6_md5_lookup, 832 .calc_md5_hash = tcp_v6_md5_hash_skb, 833 #endif 834 #ifdef CONFIG_SYN_COOKIES 835 .cookie_init_seq = cookie_v6_init_sequence, 836 #endif 837 .route_req = tcp_v6_route_req, 838 .init_seq = tcp_v6_init_seq, 839 .init_ts_off = tcp_v6_init_ts_off, 840 .send_synack = tcp_v6_send_synack, 841 }; 842 843 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, 844 u32 ack, u32 win, u32 tsval, u32 tsecr, 845 int oif, struct tcp_md5sig_key *key, int rst, 846 u8 tclass, __be32 label, u32 priority) 847 { 848 const struct tcphdr *th = tcp_hdr(skb); 849 struct tcphdr *t1; 850 struct sk_buff *buff; 851 struct flowi6 fl6; 852 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 853 struct sock *ctl_sk = net->ipv6.tcp_sk; 854 unsigned int tot_len = sizeof(struct tcphdr); 855 __be32 mrst = 0, *topt; 856 struct dst_entry *dst; 857 __u32 mark = 0; 858 859 if (tsecr) 860 tot_len += TCPOLEN_TSTAMP_ALIGNED; 861 #ifdef CONFIG_TCP_MD5SIG 862 if (key) 863 tot_len += TCPOLEN_MD5SIG_ALIGNED; 864 #endif 865 866 #ifdef CONFIG_MPTCP 867 if (rst && !key) { 868 mrst = mptcp_reset_option(skb); 869 870 if (mrst) 871 tot_len += sizeof(__be32); 872 } 873 #endif 874 875 buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); 876 if (!buff) 877 return; 878 879 skb_reserve(buff, MAX_TCP_HEADER); 880 881 t1 = skb_push(buff, tot_len); 882 skb_reset_transport_header(buff); 883 884 /* Swap the send and the receive. */ 885 memset(t1, 0, sizeof(*t1)); 886 t1->dest = th->source; 887 t1->source = th->dest; 888 t1->doff = tot_len / 4; 889 t1->seq = htonl(seq); 890 t1->ack_seq = htonl(ack); 891 t1->ack = !rst || !th->ack; 892 t1->rst = rst; 893 t1->window = htons(win); 894 895 topt = (__be32 *)(t1 + 1); 896 897 if (tsecr) { 898 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 899 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); 900 *topt++ = htonl(tsval); 901 *topt++ = htonl(tsecr); 902 } 903 904 if (mrst) 905 *topt++ = mrst; 906 907 #ifdef CONFIG_TCP_MD5SIG 908 if (key) { 909 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | 910 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); 911 tcp_v6_md5_hash_hdr((__u8 *)topt, key, 912 &ipv6_hdr(skb)->saddr, 913 &ipv6_hdr(skb)->daddr, t1); 914 } 915 #endif 916 917 memset(&fl6, 0, sizeof(fl6)); 918 fl6.daddr = ipv6_hdr(skb)->saddr; 919 fl6.saddr = ipv6_hdr(skb)->daddr; 920 fl6.flowlabel = label; 921 922 buff->ip_summed = CHECKSUM_PARTIAL; 923 924 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); 925 926 fl6.flowi6_proto = IPPROTO_TCP; 927 if (rt6_need_strict(&fl6.daddr) && !oif) 928 fl6.flowi6_oif = tcp_v6_iif(skb); 929 else { 930 if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) 931 oif = skb->skb_iif; 932 933 fl6.flowi6_oif = oif; 934 } 935 936 if (sk) { 937 if (sk->sk_state == TCP_TIME_WAIT) { 938 mark = inet_twsk(sk)->tw_mark; 939 /* autoflowlabel relies on buff->hash */ 940 skb_set_hash(buff, inet_twsk(sk)->tw_txhash, 941 PKT_HASH_TYPE_L4); 942 } else { 943 mark = sk->sk_mark; 944 } 945 skb_set_delivery_time(buff, tcp_transmit_time(sk), true); 946 } 947 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; 948 fl6.fl6_dport = t1->dest; 949 fl6.fl6_sport = t1->source; 950 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); 951 security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); 952 953 /* Pass a socket to ip6_dst_lookup either it is for RST 954 * Underlying function will use this to retrieve the network 955 * namespace 956 */ 957 if (sk && sk->sk_state != TCP_TIME_WAIT) 958 dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ 959 else 960 dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); 961 if (!IS_ERR(dst)) { 962 skb_dst_set(buff, dst); 963 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, 964 tclass & ~INET_ECN_MASK, priority); 965 TCP_INC_STATS(net, TCP_MIB_OUTSEGS); 966 if (rst) 967 TCP_INC_STATS(net, TCP_MIB_OUTRSTS); 968 return; 969 } 970 971 kfree_skb(buff); 972 } 973 974 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) 975 { 976 const struct tcphdr *th = tcp_hdr(skb); 977 struct ipv6hdr *ipv6h = ipv6_hdr(skb); 978 u32 seq = 0, ack_seq = 0; 979 struct tcp_md5sig_key *key = NULL; 980 #ifdef CONFIG_TCP_MD5SIG 981 const __u8 *hash_location = NULL; 982 unsigned char newhash[16]; 983 int genhash; 984 struct sock *sk1 = NULL; 985 #endif 986 __be32 label = 0; 987 u32 priority = 0; 988 struct net *net; 989 int oif = 0; 990 991 if (th->rst) 992 return; 993 994 /* If sk not NULL, it means we did a successful lookup and incoming 995 * route had to be correct. prequeue might have dropped our dst. 996 */ 997 if (!sk && !ipv6_unicast_destination(skb)) 998 return; 999 1000 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); 1001 #ifdef CONFIG_TCP_MD5SIG 1002 rcu_read_lock(); 1003 hash_location = tcp_parse_md5sig_option(th); 1004 if (sk && sk_fullsock(sk)) { 1005 int l3index; 1006 1007 /* sdif set, means packet ingressed via a device 1008 * in an L3 domain and inet_iif is set to it. 1009 */ 1010 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1011 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); 1012 } else if (hash_location) { 1013 int dif = tcp_v6_iif_l3_slave(skb); 1014 int sdif = tcp_v6_sdif(skb); 1015 int l3index; 1016 1017 /* 1018 * active side is lost. Try to find listening socket through 1019 * source port, and then find md5 key through listening socket. 1020 * we are not loose security here: 1021 * Incoming packet is checked with md5 hash with finding key, 1022 * no RST generated if md5 hash doesn't match. 1023 */ 1024 sk1 = inet6_lookup_listener(net, 1025 &tcp_hashinfo, NULL, 0, 1026 &ipv6h->saddr, 1027 th->source, &ipv6h->daddr, 1028 ntohs(th->source), dif, sdif); 1029 if (!sk1) 1030 goto out; 1031 1032 /* sdif set, means packet ingressed via a device 1033 * in an L3 domain and dif is set to it. 1034 */ 1035 l3index = tcp_v6_sdif(skb) ? dif : 0; 1036 1037 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); 1038 if (!key) 1039 goto out; 1040 1041 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); 1042 if (genhash || memcmp(hash_location, newhash, 16) != 0) 1043 goto out; 1044 } 1045 #endif 1046 1047 if (th->ack) 1048 seq = ntohl(th->ack_seq); 1049 else 1050 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - 1051 (th->doff << 2); 1052 1053 if (sk) { 1054 oif = sk->sk_bound_dev_if; 1055 if (sk_fullsock(sk)) { 1056 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1057 1058 trace_tcp_send_reset(sk, skb); 1059 if (np->repflow) 1060 label = ip6_flowlabel(ipv6h); 1061 priority = sk->sk_priority; 1062 } 1063 if (sk->sk_state == TCP_TIME_WAIT) { 1064 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); 1065 priority = inet_twsk(sk)->tw_priority; 1066 } 1067 } else { 1068 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) 1069 label = ip6_flowlabel(ipv6h); 1070 } 1071 1072 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 1073 ipv6_get_dsfield(ipv6h), label, priority); 1074 1075 #ifdef CONFIG_TCP_MD5SIG 1076 out: 1077 rcu_read_unlock(); 1078 #endif 1079 } 1080 1081 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, 1082 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, 1083 struct tcp_md5sig_key *key, u8 tclass, 1084 __be32 label, u32 priority) 1085 { 1086 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, 1087 tclass, label, priority); 1088 } 1089 1090 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) 1091 { 1092 struct inet_timewait_sock *tw = inet_twsk(sk); 1093 struct tcp_timewait_sock *tcptw = tcp_twsk(sk); 1094 1095 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, 1096 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, 1097 tcp_time_stamp_raw() + tcptw->tw_ts_offset, 1098 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), 1099 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority); 1100 1101 inet_twsk_put(tw); 1102 } 1103 1104 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, 1105 struct request_sock *req) 1106 { 1107 int l3index; 1108 1109 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; 1110 1111 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV 1112 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. 1113 */ 1114 /* RFC 7323 2.3 1115 * The window field (SEG.WND) of every outgoing segment, with the 1116 * exception of <SYN> segments, MUST be right-shifted by 1117 * Rcv.Wind.Shift bits: 1118 */ 1119 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? 1120 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, 1121 tcp_rsk(req)->rcv_nxt, 1122 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, 1123 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, 1124 req->ts_recent, sk->sk_bound_dev_if, 1125 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index), 1126 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority); 1127 } 1128 1129 1130 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) 1131 { 1132 #ifdef CONFIG_SYN_COOKIES 1133 const struct tcphdr *th = tcp_hdr(skb); 1134 1135 if (!th->syn) 1136 sk = cookie_v6_check(sk, skb); 1137 #endif 1138 return sk; 1139 } 1140 1141 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, 1142 struct tcphdr *th, u32 *cookie) 1143 { 1144 u16 mss = 0; 1145 #ifdef CONFIG_SYN_COOKIES 1146 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, 1147 &tcp_request_sock_ipv6_ops, sk, th); 1148 if (mss) { 1149 *cookie = __cookie_v6_init_sequence(iph, th, &mss); 1150 tcp_synq_overflow(sk); 1151 } 1152 #endif 1153 return mss; 1154 } 1155 1156 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) 1157 { 1158 if (skb->protocol == htons(ETH_P_IP)) 1159 return tcp_v4_conn_request(sk, skb); 1160 1161 if (!ipv6_unicast_destination(skb)) 1162 goto drop; 1163 1164 if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { 1165 __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); 1166 return 0; 1167 } 1168 1169 return tcp_conn_request(&tcp6_request_sock_ops, 1170 &tcp_request_sock_ipv6_ops, sk, skb); 1171 1172 drop: 1173 tcp_listendrop(sk); 1174 return 0; /* don't send reset */ 1175 } 1176 1177 static void tcp_v6_restore_cb(struct sk_buff *skb) 1178 { 1179 /* We need to move header back to the beginning if xfrm6_policy_check() 1180 * and tcp_v6_fill_cb() are going to be called again. 1181 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. 1182 */ 1183 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, 1184 sizeof(struct inet6_skb_parm)); 1185 } 1186 1187 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, 1188 struct request_sock *req, 1189 struct dst_entry *dst, 1190 struct request_sock *req_unhash, 1191 bool *own_req) 1192 { 1193 struct inet_request_sock *ireq; 1194 struct ipv6_pinfo *newnp; 1195 const struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1196 struct ipv6_txoptions *opt; 1197 struct inet_sock *newinet; 1198 bool found_dup_sk = false; 1199 struct tcp_sock *newtp; 1200 struct sock *newsk; 1201 #ifdef CONFIG_TCP_MD5SIG 1202 struct tcp_md5sig_key *key; 1203 int l3index; 1204 #endif 1205 struct flowi6 fl6; 1206 1207 if (skb->protocol == htons(ETH_P_IP)) { 1208 /* 1209 * v6 mapped 1210 */ 1211 1212 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, 1213 req_unhash, own_req); 1214 1215 if (!newsk) 1216 return NULL; 1217 1218 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1219 1220 newnp = tcp_inet6_sk(newsk); 1221 newtp = tcp_sk(newsk); 1222 1223 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1224 1225 newnp->saddr = newsk->sk_v6_rcv_saddr; 1226 1227 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; 1228 if (sk_is_mptcp(newsk)) 1229 mptcpv6_handle_mapped(newsk, true); 1230 newsk->sk_backlog_rcv = tcp_v4_do_rcv; 1231 #ifdef CONFIG_TCP_MD5SIG 1232 newtp->af_specific = &tcp_sock_ipv6_mapped_specific; 1233 #endif 1234 1235 newnp->ipv6_mc_list = NULL; 1236 newnp->ipv6_ac_list = NULL; 1237 newnp->ipv6_fl_list = NULL; 1238 newnp->pktoptions = NULL; 1239 newnp->opt = NULL; 1240 newnp->mcast_oif = inet_iif(skb); 1241 newnp->mcast_hops = ip_hdr(skb)->ttl; 1242 newnp->rcv_flowinfo = 0; 1243 if (np->repflow) 1244 newnp->flow_label = 0; 1245 1246 /* 1247 * No need to charge this sock to the relevant IPv6 refcnt debug socks count 1248 * here, tcp_create_openreq_child now does this for us, see the comment in 1249 * that function for the gory details. -acme 1250 */ 1251 1252 /* It is tricky place. Until this moment IPv4 tcp 1253 worked with IPv6 icsk.icsk_af_ops. 1254 Sync it now. 1255 */ 1256 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); 1257 1258 return newsk; 1259 } 1260 1261 ireq = inet_rsk(req); 1262 1263 if (sk_acceptq_is_full(sk)) 1264 goto out_overflow; 1265 1266 if (!dst) { 1267 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); 1268 if (!dst) 1269 goto out; 1270 } 1271 1272 newsk = tcp_create_openreq_child(sk, req, skb); 1273 if (!newsk) 1274 goto out_nonewsk; 1275 1276 /* 1277 * No need to charge this sock to the relevant IPv6 refcnt debug socks 1278 * count here, tcp_create_openreq_child now does this for us, see the 1279 * comment in that function for the gory details. -acme 1280 */ 1281 1282 newsk->sk_gso_type = SKB_GSO_TCPV6; 1283 ip6_dst_store(newsk, dst, NULL, NULL); 1284 inet6_sk_rx_dst_set(newsk, skb); 1285 1286 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); 1287 1288 newtp = tcp_sk(newsk); 1289 newinet = inet_sk(newsk); 1290 newnp = tcp_inet6_sk(newsk); 1291 1292 memcpy(newnp, np, sizeof(struct ipv6_pinfo)); 1293 1294 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; 1295 newnp->saddr = ireq->ir_v6_loc_addr; 1296 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; 1297 newsk->sk_bound_dev_if = ireq->ir_iif; 1298 1299 /* Now IPv6 options... 1300 1301 First: no IPv4 options. 1302 */ 1303 newinet->inet_opt = NULL; 1304 newnp->ipv6_mc_list = NULL; 1305 newnp->ipv6_ac_list = NULL; 1306 newnp->ipv6_fl_list = NULL; 1307 1308 /* Clone RX bits */ 1309 newnp->rxopt.all = np->rxopt.all; 1310 1311 newnp->pktoptions = NULL; 1312 newnp->opt = NULL; 1313 newnp->mcast_oif = tcp_v6_iif(skb); 1314 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; 1315 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); 1316 if (np->repflow) 1317 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); 1318 1319 /* Set ToS of the new socket based upon the value of incoming SYN. 1320 * ECT bits are set later in tcp_init_transfer(). 1321 */ 1322 if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) 1323 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; 1324 1325 /* Clone native IPv6 options from listening socket (if any) 1326 1327 Yes, keeping reference count would be much more clever, 1328 but we make one more one thing there: reattach optmem 1329 to newsk. 1330 */ 1331 opt = ireq->ipv6_opt; 1332 if (!opt) 1333 opt = rcu_dereference(np->opt); 1334 if (opt) { 1335 opt = ipv6_dup_options(newsk, opt); 1336 RCU_INIT_POINTER(newnp->opt, opt); 1337 } 1338 inet_csk(newsk)->icsk_ext_hdr_len = 0; 1339 if (opt) 1340 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + 1341 opt->opt_flen; 1342 1343 tcp_ca_openreq_child(newsk, dst); 1344 1345 tcp_sync_mss(newsk, dst_mtu(dst)); 1346 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); 1347 1348 tcp_initialize_rcv_mss(newsk); 1349 1350 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; 1351 newinet->inet_rcv_saddr = LOOPBACK4_IPV6; 1352 1353 #ifdef CONFIG_TCP_MD5SIG 1354 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); 1355 1356 /* Copy over the MD5 key from the original socket */ 1357 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); 1358 if (key) { 1359 /* We're using one, so create a matching key 1360 * on the newsk structure. If we fail to get 1361 * memory, then we end up not copying the key 1362 * across. Shucks. 1363 */ 1364 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr, 1365 AF_INET6, 128, l3index, key->flags, key->key, key->keylen, 1366 sk_gfp_mask(sk, GFP_ATOMIC)); 1367 } 1368 #endif 1369 1370 if (__inet_inherit_port(sk, newsk) < 0) { 1371 inet_csk_prepare_forced_close(newsk); 1372 tcp_done(newsk); 1373 goto out; 1374 } 1375 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), 1376 &found_dup_sk); 1377 if (*own_req) { 1378 tcp_move_syn(newtp, req); 1379 1380 /* Clone pktoptions received with SYN, if we own the req */ 1381 if (ireq->pktopts) { 1382 newnp->pktoptions = skb_clone(ireq->pktopts, 1383 sk_gfp_mask(sk, GFP_ATOMIC)); 1384 consume_skb(ireq->pktopts); 1385 ireq->pktopts = NULL; 1386 if (newnp->pktoptions) { 1387 tcp_v6_restore_cb(newnp->pktoptions); 1388 skb_set_owner_r(newnp->pktoptions, newsk); 1389 } 1390 } 1391 } else { 1392 if (!req_unhash && found_dup_sk) { 1393 /* This code path should only be executed in the 1394 * syncookie case only 1395 */ 1396 bh_unlock_sock(newsk); 1397 sock_put(newsk); 1398 newsk = NULL; 1399 } 1400 } 1401 1402 return newsk; 1403 1404 out_overflow: 1405 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); 1406 out_nonewsk: 1407 dst_release(dst); 1408 out: 1409 tcp_listendrop(sk); 1410 return NULL; 1411 } 1412 1413 INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, 1414 u32)); 1415 /* The socket must have it's spinlock held when we get 1416 * here, unless it is a TCP_LISTEN socket. 1417 * 1418 * We have a potential double-lock case here, so even when 1419 * doing backlog processing we use the BH locking scheme. 1420 * This is because we cannot sleep with the original spinlock 1421 * held. 1422 */ 1423 INDIRECT_CALLABLE_SCOPE 1424 int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) 1425 { 1426 struct ipv6_pinfo *np = tcp_inet6_sk(sk); 1427 struct sk_buff *opt_skb = NULL; 1428 enum skb_drop_reason reason; 1429 struct tcp_sock *tp; 1430 1431 /* Imagine: socket is IPv6. IPv4 packet arrives, 1432 goes to IPv4 receive handler and backlogged. 1433 From backlog it always goes here. Kerboom... 1434 Fortunately, tcp_rcv_established and rcv_established 1435 handle them correctly, but it is not case with 1436 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK 1437 */ 1438 1439 if (skb->protocol == htons(ETH_P_IP)) 1440 return tcp_v4_do_rcv(sk, skb); 1441 1442 /* 1443 * socket locking is here for SMP purposes as backlog rcv 1444 * is currently called with bh processing disabled. 1445 */ 1446 1447 /* Do Stevens' IPV6_PKTOPTIONS. 1448 1449 Yes, guys, it is the only place in our code, where we 1450 may make it not affecting IPv4. 1451 The rest of code is protocol independent, 1452 and I do not like idea to uglify IPv4. 1453 1454 Actually, all the idea behind IPV6_PKTOPTIONS 1455 looks not very well thought. For now we latch 1456 options, received in the last packet, enqueued 1457 by tcp. Feel free to propose better solution. 1458 --ANK (980728) 1459 */ 1460 if (np->rxopt.all) 1461 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC)); 1462 1463 reason = SKB_DROP_REASON_NOT_SPECIFIED; 1464 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ 1465 struct dst_entry *dst; 1466 1467 dst = rcu_dereference_protected(sk->sk_rx_dst, 1468 lockdep_sock_is_held(sk)); 1469 1470 sock_rps_save_rxhash(sk, skb); 1471 sk_mark_napi_id(sk, skb); 1472 if (dst) { 1473 if (sk->sk_rx_dst_ifindex != skb->skb_iif || 1474 INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, 1475 dst, sk->sk_rx_dst_cookie) == NULL) { 1476 RCU_INIT_POINTER(sk->sk_rx_dst, NULL); 1477 dst_release(dst); 1478 } 1479 } 1480 1481 tcp_rcv_established(sk, skb); 1482 if (opt_skb) 1483 goto ipv6_pktoptions; 1484 return 0; 1485 } 1486 1487 if (tcp_checksum_complete(skb)) 1488 goto csum_err; 1489 1490 if (sk->sk_state == TCP_LISTEN) { 1491 struct sock *nsk = tcp_v6_cookie_check(sk, skb); 1492 1493 if (!nsk) 1494 goto discard; 1495 1496 if (nsk != sk) { 1497 if (tcp_child_process(sk, nsk, skb)) 1498 goto reset; 1499 if (opt_skb) 1500 __kfree_skb(opt_skb); 1501 return 0; 1502 } 1503 } else 1504 sock_rps_save_rxhash(sk, skb); 1505 1506 if (tcp_rcv_state_process(sk, skb)) 1507 goto reset; 1508 if (opt_skb) 1509 goto ipv6_pktoptions; 1510 return 0; 1511 1512 reset: 1513 tcp_v6_send_reset(sk, skb); 1514 discard: 1515 if (opt_skb) 1516 __kfree_skb(opt_skb); 1517 kfree_skb_reason(skb, reason); 1518 return 0; 1519 csum_err: 1520 reason = SKB_DROP_REASON_TCP_CSUM; 1521 trace_tcp_bad_csum(skb); 1522 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); 1523 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); 1524 goto discard; 1525 1526 1527 ipv6_pktoptions: 1528 /* Do you ask, what is it? 1529 1530 1. skb was enqueued by tcp. 1531 2. skb is added to tail of read queue, rather than out of order. 1532 3. socket is not in passive state. 1533 4. Finally, it really contains options, which user wants to receive. 1534 */ 1535 tp = tcp_sk(sk); 1536 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && 1537 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { 1538 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) 1539 np->mcast_oif = tcp_v6_iif(opt_skb); 1540 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) 1541 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; 1542 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) 1543 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); 1544 if (np->repflow) 1545 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); 1546 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { 1547 skb_set_owner_r(opt_skb, sk); 1548 tcp_v6_restore_cb(opt_skb); 1549 opt_skb = xchg(&np->pktoptions, opt_skb); 1550 } else { 1551 __kfree_skb(opt_skb); 1552 opt_skb = xchg(&np->pktoptions, NULL); 1553 } 1554 } 1555 1556 consume_skb(opt_skb); 1557 return 0; 1558 } 1559 1560 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, 1561 const struct tcphdr *th) 1562 { 1563 /* This is tricky: we move IP6CB at its correct location into 1564 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because 1565 * _decode_session6() uses IP6CB(). 1566 * barrier() makes sure compiler won't play aliasing games. 1567 */ 1568 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), 1569 sizeof(struct inet6_skb_parm)); 1570 barrier(); 1571 1572 TCP_SKB_CB(skb)->seq = ntohl(th->seq); 1573 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + 1574 skb->len - th->doff*4); 1575 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); 1576 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); 1577 TCP_SKB_CB(skb)->tcp_tw_isn = 0; 1578 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); 1579 TCP_SKB_CB(skb)->sacked = 0; 1580 TCP_SKB_CB(skb)->has_rxtstamp = 1581 skb->tstamp || skb_hwtstamps(skb)->hwtstamp; 1582 } 1583 1584 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) 1585 { 1586 enum skb_drop_reason drop_reason; 1587 int sdif = inet6_sdif(skb); 1588 int dif = inet6_iif(skb); 1589 const struct tcphdr *th; 1590 const struct ipv6hdr *hdr; 1591 bool refcounted; 1592 struct sock *sk; 1593 int ret; 1594 struct net *net = dev_net(skb->dev); 1595 1596 drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; 1597 if (skb->pkt_type != PACKET_HOST) 1598 goto discard_it; 1599 1600 /* 1601 * Count it even if it's bad. 1602 */ 1603 __TCP_INC_STATS(net, TCP_MIB_INSEGS); 1604 1605 if (!pskb_may_pull(skb, sizeof(struct tcphdr))) 1606 goto discard_it; 1607 1608 th = (const struct tcphdr *)skb->data; 1609 1610 if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { 1611 drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; 1612 goto bad_packet; 1613 } 1614 if (!pskb_may_pull(skb, th->doff*4)) 1615 goto discard_it; 1616 1617 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) 1618 goto csum_error; 1619 1620 th = (const struct tcphdr *)skb->data; 1621 hdr = ipv6_hdr(skb); 1622 1623 lookup: 1624 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), 1625 th->source, th->dest, inet6_iif(skb), sdif, 1626 &refcounted); 1627 if (!sk) 1628 goto no_tcp_socket; 1629 1630 process: 1631 if (sk->sk_state == TCP_TIME_WAIT) 1632 goto do_time_wait; 1633 1634 if (sk->sk_state == TCP_NEW_SYN_RECV) { 1635 struct request_sock *req = inet_reqsk(sk); 1636 bool req_stolen = false; 1637 struct sock *nsk; 1638 1639 sk = req->rsk_listener; 1640 drop_reason = tcp_inbound_md5_hash(sk, skb, 1641 &hdr->saddr, &hdr->daddr, 1642 AF_INET6, dif, sdif); 1643 if (drop_reason) { 1644 sk_drops_add(sk, skb); 1645 reqsk_put(req); 1646 goto discard_it; 1647 } 1648 if (tcp_checksum_complete(skb)) { 1649 reqsk_put(req); 1650 goto csum_error; 1651 } 1652 if (unlikely(sk->sk_state != TCP_LISTEN)) { 1653 nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); 1654 if (!nsk) { 1655 inet_csk_reqsk_queue_drop_and_put(sk, req); 1656 goto lookup; 1657 } 1658 sk = nsk; 1659 /* reuseport_migrate_sock() has already held one sk_refcnt 1660 * before returning. 1661 */ 1662 } else { 1663 sock_hold(sk); 1664 } 1665 refcounted = true; 1666 nsk = NULL; 1667 if (!tcp_filter(sk, skb)) { 1668 th = (const struct tcphdr *)skb->data; 1669 hdr = ipv6_hdr(skb); 1670 tcp_v6_fill_cb(skb, hdr, th); 1671 nsk = tcp_check_req(sk, skb, req, false, &req_stolen); 1672 } else { 1673 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1674 } 1675 if (!nsk) { 1676 reqsk_put(req); 1677 if (req_stolen) { 1678 /* Another cpu got exclusive access to req 1679 * and created a full blown socket. 1680 * Try to feed this packet to this socket 1681 * instead of discarding it. 1682 */ 1683 tcp_v6_restore_cb(skb); 1684 sock_put(sk); 1685 goto lookup; 1686 } 1687 goto discard_and_relse; 1688 } 1689 if (nsk == sk) { 1690 reqsk_put(req); 1691 tcp_v6_restore_cb(skb); 1692 } else if (tcp_child_process(sk, nsk, skb)) { 1693 tcp_v6_send_reset(nsk, skb); 1694 goto discard_and_relse; 1695 } else { 1696 sock_put(sk); 1697 return 0; 1698 } 1699 } 1700 1701 if (static_branch_unlikely(&ip6_min_hopcount)) { 1702 /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ 1703 if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { 1704 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); 1705 goto discard_and_relse; 1706 } 1707 } 1708 1709 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { 1710 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1711 goto discard_and_relse; 1712 } 1713 1714 drop_reason = tcp_inbound_md5_hash(sk, skb, &hdr->saddr, &hdr->daddr, 1715 AF_INET6, dif, sdif); 1716 if (drop_reason) 1717 goto discard_and_relse; 1718 1719 if (tcp_filter(sk, skb)) { 1720 drop_reason = SKB_DROP_REASON_SOCKET_FILTER; 1721 goto discard_and_relse; 1722 } 1723 th = (const struct tcphdr *)skb->data; 1724 hdr = ipv6_hdr(skb); 1725 tcp_v6_fill_cb(skb, hdr, th); 1726 1727 skb->dev = NULL; 1728 1729 if (sk->sk_state == TCP_LISTEN) { 1730 ret = tcp_v6_do_rcv(sk, skb); 1731 goto put_and_return; 1732 } 1733 1734 sk_incoming_cpu_update(sk); 1735 1736 bh_lock_sock_nested(sk); 1737 tcp_segs_in(tcp_sk(sk), skb); 1738 ret = 0; 1739 if (!sock_owned_by_user(sk)) { 1740 ret = tcp_v6_do_rcv(sk, skb); 1741 } else { 1742 if (tcp_add_backlog(sk, skb, &drop_reason)) 1743 goto discard_and_relse; 1744 } 1745 bh_unlock_sock(sk); 1746 put_and_return: 1747 if (refcounted) 1748 sock_put(sk); 1749 return ret ? -1 : 0; 1750 1751 no_tcp_socket: 1752 drop_reason = SKB_DROP_REASON_NO_SOCKET; 1753 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) 1754 goto discard_it; 1755 1756 tcp_v6_fill_cb(skb, hdr, th); 1757 1758 if (tcp_checksum_complete(skb)) { 1759 csum_error: 1760 drop_reason = SKB_DROP_REASON_TCP_CSUM; 1761 trace_tcp_bad_csum(skb); 1762 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); 1763 bad_packet: 1764 __TCP_INC_STATS(net, TCP_MIB_INERRS); 1765 } else { 1766 tcp_v6_send_reset(NULL, skb); 1767 } 1768 1769 discard_it: 1770 SKB_DR_OR(drop_reason, NOT_SPECIFIED); 1771 kfree_skb_reason(skb, drop_reason); 1772 return 0; 1773 1774 discard_and_relse: 1775 sk_drops_add(sk, skb); 1776 if (refcounted) 1777 sock_put(sk); 1778 goto discard_it; 1779 1780 do_time_wait: 1781 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { 1782 drop_reason = SKB_DROP_REASON_XFRM_POLICY; 1783 inet_twsk_put(inet_twsk(sk)); 1784 goto discard_it; 1785 } 1786 1787 tcp_v6_fill_cb(skb, hdr, th); 1788 1789 if (tcp_checksum_complete(skb)) { 1790 inet_twsk_put(inet_twsk(sk)); 1791 goto csum_error; 1792 } 1793 1794 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { 1795 case TCP_TW_SYN: 1796 { 1797 struct sock *sk2; 1798 1799 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, 1800 skb, __tcp_hdrlen(th), 1801 &ipv6_hdr(skb)->saddr, th->source, 1802 &ipv6_hdr(skb)->daddr, 1803 ntohs(th->dest), 1804 tcp_v6_iif_l3_slave(skb), 1805 sdif); 1806 if (sk2) { 1807 struct inet_timewait_sock *tw = inet_twsk(sk); 1808 inet_twsk_deschedule_put(tw); 1809 sk = sk2; 1810 tcp_v6_restore_cb(skb); 1811 refcounted = false; 1812 goto process; 1813 } 1814 } 1815 /* to ACK */ 1816 fallthrough; 1817 case TCP_TW_ACK: 1818 tcp_v6_timewait_ack(sk, skb); 1819 break; 1820 case TCP_TW_RST: 1821 tcp_v6_send_reset(sk, skb); 1822 inet_twsk_deschedule_put(inet_twsk(sk)); 1823 goto discard_it; 1824 case TCP_TW_SUCCESS: 1825 ; 1826 } 1827 goto discard_it; 1828 } 1829 1830 void tcp_v6_early_demux(struct sk_buff *skb) 1831 { 1832 const struct ipv6hdr *hdr; 1833 const struct tcphdr *th; 1834 struct sock *sk; 1835 1836 if (skb->pkt_type != PACKET_HOST) 1837 return; 1838 1839 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) 1840 return; 1841 1842 hdr = ipv6_hdr(skb); 1843 th = tcp_hdr(skb); 1844 1845 if (th->doff < sizeof(struct tcphdr) / 4) 1846 return; 1847 1848 /* Note : We use inet6_iif() here, not tcp_v6_iif() */ 1849 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, 1850 &hdr->saddr, th->source, 1851 &hdr->daddr, ntohs(th->dest), 1852 inet6_iif(skb), inet6_sdif(skb)); 1853 if (sk) { 1854 skb->sk = sk; 1855 skb->destructor = sock_edemux; 1856 if (sk_fullsock(sk)) { 1857 struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); 1858 1859 if (dst) 1860 dst = dst_check(dst, sk->sk_rx_dst_cookie); 1861 if (dst && 1862 sk->sk_rx_dst_ifindex == skb->skb_iif) 1863 skb_dst_set_noref(skb, dst); 1864 } 1865 } 1866 } 1867 1868 static struct timewait_sock_ops tcp6_timewait_sock_ops = { 1869 .twsk_obj_size = sizeof(struct tcp6_timewait_sock), 1870 .twsk_unique = tcp_twsk_unique, 1871 .twsk_destructor = tcp_twsk_destructor, 1872 }; 1873 1874 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) 1875 { 1876 __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); 1877 } 1878 1879 const struct inet_connection_sock_af_ops ipv6_specific = { 1880 .queue_xmit = inet6_csk_xmit, 1881 .send_check = tcp_v6_send_check, 1882 .rebuild_header = inet6_sk_rebuild_header, 1883 .sk_rx_dst_set = inet6_sk_rx_dst_set, 1884 .conn_request = tcp_v6_conn_request, 1885 .syn_recv_sock = tcp_v6_syn_recv_sock, 1886 .net_header_len = sizeof(struct ipv6hdr), 1887 .net_frag_header_len = sizeof(struct frag_hdr), 1888 .setsockopt = ipv6_setsockopt, 1889 .getsockopt = ipv6_getsockopt, 1890 .addr2sockaddr = inet6_csk_addr2sockaddr, 1891 .sockaddr_len = sizeof(struct sockaddr_in6), 1892 .mtu_reduced = tcp_v6_mtu_reduced, 1893 }; 1894 1895 #ifdef CONFIG_TCP_MD5SIG 1896 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { 1897 .md5_lookup = tcp_v6_md5_lookup, 1898 .calc_md5_hash = tcp_v6_md5_hash_skb, 1899 .md5_parse = tcp_v6_parse_md5_keys, 1900 }; 1901 #endif 1902 1903 /* 1904 * TCP over IPv4 via INET6 API 1905 */ 1906 static const struct inet_connection_sock_af_ops ipv6_mapped = { 1907 .queue_xmit = ip_queue_xmit, 1908 .send_check = tcp_v4_send_check, 1909 .rebuild_header = inet_sk_rebuild_header, 1910 .sk_rx_dst_set = inet_sk_rx_dst_set, 1911 .conn_request = tcp_v6_conn_request, 1912 .syn_recv_sock = tcp_v6_syn_recv_sock, 1913 .net_header_len = sizeof(struct iphdr), 1914 .setsockopt = ipv6_setsockopt, 1915 .getsockopt = ipv6_getsockopt, 1916 .addr2sockaddr = inet6_csk_addr2sockaddr, 1917 .sockaddr_len = sizeof(struct sockaddr_in6), 1918 .mtu_reduced = tcp_v4_mtu_reduced, 1919 }; 1920 1921 #ifdef CONFIG_TCP_MD5SIG 1922 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { 1923 .md5_lookup = tcp_v4_md5_lookup, 1924 .calc_md5_hash = tcp_v4_md5_hash_skb, 1925 .md5_parse = tcp_v6_parse_md5_keys, 1926 }; 1927 #endif 1928 1929 /* NOTE: A lot of things set to zero explicitly by call to 1930 * sk_alloc() so need not be done here. 1931 */ 1932 static int tcp_v6_init_sock(struct sock *sk) 1933 { 1934 struct inet_connection_sock *icsk = inet_csk(sk); 1935 1936 tcp_init_sock(sk); 1937 1938 icsk->icsk_af_ops = &ipv6_specific; 1939 1940 #ifdef CONFIG_TCP_MD5SIG 1941 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; 1942 #endif 1943 1944 return 0; 1945 } 1946 1947 static void tcp_v6_destroy_sock(struct sock *sk) 1948 { 1949 tcp_v4_destroy_sock(sk); 1950 inet6_destroy_sock(sk); 1951 } 1952 1953 #ifdef CONFIG_PROC_FS 1954 /* Proc filesystem TCPv6 sock list dumping. */ 1955 static void get_openreq6(struct seq_file *seq, 1956 const struct request_sock *req, int i) 1957 { 1958 long ttd = req->rsk_timer.expires - jiffies; 1959 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; 1960 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; 1961 1962 if (ttd < 0) 1963 ttd = 0; 1964 1965 seq_printf(seq, 1966 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 1967 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", 1968 i, 1969 src->s6_addr32[0], src->s6_addr32[1], 1970 src->s6_addr32[2], src->s6_addr32[3], 1971 inet_rsk(req)->ir_num, 1972 dest->s6_addr32[0], dest->s6_addr32[1], 1973 dest->s6_addr32[2], dest->s6_addr32[3], 1974 ntohs(inet_rsk(req)->ir_rmt_port), 1975 TCP_SYN_RECV, 1976 0, 0, /* could print option size, but that is af dependent. */ 1977 1, /* timers active (only the expire timer) */ 1978 jiffies_to_clock_t(ttd), 1979 req->num_timeout, 1980 from_kuid_munged(seq_user_ns(seq), 1981 sock_i_uid(req->rsk_listener)), 1982 0, /* non standard timer */ 1983 0, /* open_requests have no inode */ 1984 0, req); 1985 } 1986 1987 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) 1988 { 1989 const struct in6_addr *dest, *src; 1990 __u16 destp, srcp; 1991 int timer_active; 1992 unsigned long timer_expires; 1993 const struct inet_sock *inet = inet_sk(sp); 1994 const struct tcp_sock *tp = tcp_sk(sp); 1995 const struct inet_connection_sock *icsk = inet_csk(sp); 1996 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; 1997 int rx_queue; 1998 int state; 1999 2000 dest = &sp->sk_v6_daddr; 2001 src = &sp->sk_v6_rcv_saddr; 2002 destp = ntohs(inet->inet_dport); 2003 srcp = ntohs(inet->inet_sport); 2004 2005 if (icsk->icsk_pending == ICSK_TIME_RETRANS || 2006 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || 2007 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { 2008 timer_active = 1; 2009 timer_expires = icsk->icsk_timeout; 2010 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { 2011 timer_active = 4; 2012 timer_expires = icsk->icsk_timeout; 2013 } else if (timer_pending(&sp->sk_timer)) { 2014 timer_active = 2; 2015 timer_expires = sp->sk_timer.expires; 2016 } else { 2017 timer_active = 0; 2018 timer_expires = jiffies; 2019 } 2020 2021 state = inet_sk_state_load(sp); 2022 if (state == TCP_LISTEN) 2023 rx_queue = READ_ONCE(sp->sk_ack_backlog); 2024 else 2025 /* Because we don't lock the socket, 2026 * we might find a transient negative value. 2027 */ 2028 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - 2029 READ_ONCE(tp->copied_seq), 0); 2030 2031 seq_printf(seq, 2032 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2033 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", 2034 i, 2035 src->s6_addr32[0], src->s6_addr32[1], 2036 src->s6_addr32[2], src->s6_addr32[3], srcp, 2037 dest->s6_addr32[0], dest->s6_addr32[1], 2038 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2039 state, 2040 READ_ONCE(tp->write_seq) - tp->snd_una, 2041 rx_queue, 2042 timer_active, 2043 jiffies_delta_to_clock_t(timer_expires - jiffies), 2044 icsk->icsk_retransmits, 2045 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), 2046 icsk->icsk_probes_out, 2047 sock_i_ino(sp), 2048 refcount_read(&sp->sk_refcnt), sp, 2049 jiffies_to_clock_t(icsk->icsk_rto), 2050 jiffies_to_clock_t(icsk->icsk_ack.ato), 2051 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), 2052 tcp_snd_cwnd(tp), 2053 state == TCP_LISTEN ? 2054 fastopenq->max_qlen : 2055 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) 2056 ); 2057 } 2058 2059 static void get_timewait6_sock(struct seq_file *seq, 2060 struct inet_timewait_sock *tw, int i) 2061 { 2062 long delta = tw->tw_timer.expires - jiffies; 2063 const struct in6_addr *dest, *src; 2064 __u16 destp, srcp; 2065 2066 dest = &tw->tw_v6_daddr; 2067 src = &tw->tw_v6_rcv_saddr; 2068 destp = ntohs(tw->tw_dport); 2069 srcp = ntohs(tw->tw_sport); 2070 2071 seq_printf(seq, 2072 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " 2073 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", 2074 i, 2075 src->s6_addr32[0], src->s6_addr32[1], 2076 src->s6_addr32[2], src->s6_addr32[3], srcp, 2077 dest->s6_addr32[0], dest->s6_addr32[1], 2078 dest->s6_addr32[2], dest->s6_addr32[3], destp, 2079 tw->tw_substate, 0, 0, 2080 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, 2081 refcount_read(&tw->tw_refcnt), tw); 2082 } 2083 2084 static int tcp6_seq_show(struct seq_file *seq, void *v) 2085 { 2086 struct tcp_iter_state *st; 2087 struct sock *sk = v; 2088 2089 if (v == SEQ_START_TOKEN) { 2090 seq_puts(seq, 2091 " sl " 2092 "local_address " 2093 "remote_address " 2094 "st tx_queue rx_queue tr tm->when retrnsmt" 2095 " uid timeout inode\n"); 2096 goto out; 2097 } 2098 st = seq->private; 2099 2100 if (sk->sk_state == TCP_TIME_WAIT) 2101 get_timewait6_sock(seq, v, st->num); 2102 else if (sk->sk_state == TCP_NEW_SYN_RECV) 2103 get_openreq6(seq, v, st->num); 2104 else 2105 get_tcp6_sock(seq, v, st->num); 2106 out: 2107 return 0; 2108 } 2109 2110 static const struct seq_operations tcp6_seq_ops = { 2111 .show = tcp6_seq_show, 2112 .start = tcp_seq_start, 2113 .next = tcp_seq_next, 2114 .stop = tcp_seq_stop, 2115 }; 2116 2117 static struct tcp_seq_afinfo tcp6_seq_afinfo = { 2118 .family = AF_INET6, 2119 }; 2120 2121 int __net_init tcp6_proc_init(struct net *net) 2122 { 2123 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, 2124 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) 2125 return -ENOMEM; 2126 return 0; 2127 } 2128 2129 void tcp6_proc_exit(struct net *net) 2130 { 2131 remove_proc_entry("tcp6", net->proc_net); 2132 } 2133 #endif 2134 2135 struct proto tcpv6_prot = { 2136 .name = "TCPv6", 2137 .owner = THIS_MODULE, 2138 .close = tcp_close, 2139 .pre_connect = tcp_v6_pre_connect, 2140 .connect = tcp_v6_connect, 2141 .disconnect = tcp_disconnect, 2142 .accept = inet_csk_accept, 2143 .ioctl = tcp_ioctl, 2144 .init = tcp_v6_init_sock, 2145 .destroy = tcp_v6_destroy_sock, 2146 .shutdown = tcp_shutdown, 2147 .setsockopt = tcp_setsockopt, 2148 .getsockopt = tcp_getsockopt, 2149 .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, 2150 .keepalive = tcp_set_keepalive, 2151 .recvmsg = tcp_recvmsg, 2152 .sendmsg = tcp_sendmsg, 2153 .sendpage = tcp_sendpage, 2154 .backlog_rcv = tcp_v6_do_rcv, 2155 .release_cb = tcp_release_cb, 2156 .hash = inet6_hash, 2157 .unhash = inet_unhash, 2158 .get_port = inet_csk_get_port, 2159 .put_port = inet_put_port, 2160 #ifdef CONFIG_BPF_SYSCALL 2161 .psock_update_sk_prot = tcp_bpf_update_proto, 2162 #endif 2163 .enter_memory_pressure = tcp_enter_memory_pressure, 2164 .leave_memory_pressure = tcp_leave_memory_pressure, 2165 .stream_memory_free = tcp_stream_memory_free, 2166 .sockets_allocated = &tcp_sockets_allocated, 2167 2168 .memory_allocated = &tcp_memory_allocated, 2169 .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, 2170 2171 .memory_pressure = &tcp_memory_pressure, 2172 .orphan_count = &tcp_orphan_count, 2173 .sysctl_mem = sysctl_tcp_mem, 2174 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), 2175 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), 2176 .max_header = MAX_TCP_HEADER, 2177 .obj_size = sizeof(struct tcp6_sock), 2178 .slab_flags = SLAB_TYPESAFE_BY_RCU, 2179 .twsk_prot = &tcp6_timewait_sock_ops, 2180 .rsk_prot = &tcp6_request_sock_ops, 2181 .h.hashinfo = &tcp_hashinfo, 2182 .no_autobind = true, 2183 .diag_destroy = tcp_abort, 2184 }; 2185 EXPORT_SYMBOL_GPL(tcpv6_prot); 2186 2187 static const struct inet6_protocol tcpv6_protocol = { 2188 .handler = tcp_v6_rcv, 2189 .err_handler = tcp_v6_err, 2190 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, 2191 }; 2192 2193 static struct inet_protosw tcpv6_protosw = { 2194 .type = SOCK_STREAM, 2195 .protocol = IPPROTO_TCP, 2196 .prot = &tcpv6_prot, 2197 .ops = &inet6_stream_ops, 2198 .flags = INET_PROTOSW_PERMANENT | 2199 INET_PROTOSW_ICSK, 2200 }; 2201 2202 static int __net_init tcpv6_net_init(struct net *net) 2203 { 2204 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, 2205 SOCK_RAW, IPPROTO_TCP, net); 2206 } 2207 2208 static void __net_exit tcpv6_net_exit(struct net *net) 2209 { 2210 inet_ctl_sock_destroy(net->ipv6.tcp_sk); 2211 } 2212 2213 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) 2214 { 2215 inet_twsk_purge(&tcp_hashinfo, AF_INET6); 2216 } 2217 2218 static struct pernet_operations tcpv6_net_ops = { 2219 .init = tcpv6_net_init, 2220 .exit = tcpv6_net_exit, 2221 .exit_batch = tcpv6_net_exit_batch, 2222 }; 2223 2224 int __init tcpv6_init(void) 2225 { 2226 int ret; 2227 2228 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP); 2229 if (ret) 2230 goto out; 2231 2232 /* register inet6 protocol */ 2233 ret = inet6_register_protosw(&tcpv6_protosw); 2234 if (ret) 2235 goto out_tcpv6_protocol; 2236 2237 ret = register_pernet_subsys(&tcpv6_net_ops); 2238 if (ret) 2239 goto out_tcpv6_protosw; 2240 2241 ret = mptcpv6_init(); 2242 if (ret) 2243 goto out_tcpv6_pernet_subsys; 2244 2245 out: 2246 return ret; 2247 2248 out_tcpv6_pernet_subsys: 2249 unregister_pernet_subsys(&tcpv6_net_ops); 2250 out_tcpv6_protosw: 2251 inet6_unregister_protosw(&tcpv6_protosw); 2252 out_tcpv6_protocol: 2253 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2254 goto out; 2255 } 2256 2257 void tcpv6_exit(void) 2258 { 2259 unregister_pernet_subsys(&tcpv6_net_ops); 2260 inet6_unregister_protosw(&tcpv6_protosw); 2261 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP); 2262 } 2263
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.