1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/types.h> 4 #include <linux/netfilter.h> 5 #include <linux/module.h> 6 #include <linux/slab.h> 7 #include <linux/mutex.h> 8 #include <linux/vmalloc.h> 9 #include <linux/stddef.h> 10 #include <linux/err.h> 11 #include <linux/percpu.h> 12 #include <linux/notifier.h> 13 #include <linux/kernel.h> 14 #include <linux/netdevice.h> 15 16 #include <net/netfilter/nf_conntrack.h> 17 #include <net/netfilter/nf_conntrack_l4proto.h> 18 #include <net/netfilter/nf_conntrack_core.h> 19 #include <net/netfilter/nf_log.h> 20 21 #include <linux/ip.h> 22 #include <linux/icmp.h> 23 #include <linux/sysctl.h> 24 #include <net/route.h> 25 #include <net/ip.h> 26 27 #include <linux/netfilter_ipv4.h> 28 #include <linux/netfilter_ipv6.h> 29 #include <linux/netfilter_ipv6/ip6_tables.h> 30 #include <net/netfilter/nf_conntrack_helper.h> 31 #include <net/netfilter/nf_conntrack_zones.h> 32 #include <net/netfilter/nf_conntrack_seqadj.h> 33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> 34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h> 35 #include <net/netfilter/nf_nat_helper.h> 36 #include <net/netfilter/ipv4/nf_defrag_ipv4.h> 37 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 38 39 #include <linux/ipv6.h> 40 #include <linux/in6.h> 41 #include <net/ipv6.h> 42 #include <net/inet_frag.h> 43 44 extern unsigned int nf_conntrack_net_id; 45 46 static DEFINE_MUTEX(nf_ct_proto_mutex); 47 48 #ifdef CONFIG_SYSCTL 49 __printf(5, 6) 50 void nf_l4proto_log_invalid(const struct sk_buff *skb, 51 struct net *net, 52 u16 pf, u8 protonum, 53 const char *fmt, ...) 54 { 55 struct va_format vaf; 56 va_list args; 57 58 if (net->ct.sysctl_log_invalid != protonum && 59 net->ct.sysctl_log_invalid != IPPROTO_RAW) 60 return; 61 62 va_start(args, fmt); 63 vaf.fmt = fmt; 64 vaf.va = &args; 65 66 nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, 67 "nf_ct_proto_%d: %pV ", protonum, &vaf); 68 va_end(args); 69 } 70 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid); 71 72 __printf(3, 4) 73 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, 74 const struct nf_conn *ct, 75 const char *fmt, ...) 76 { 77 struct va_format vaf; 78 struct net *net; 79 va_list args; 80 81 net = nf_ct_net(ct); 82 if (likely(net->ct.sysctl_log_invalid == 0)) 83 return; 84 85 va_start(args, fmt); 86 vaf.fmt = fmt; 87 vaf.va = &args; 88 89 nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct), 90 nf_ct_protonum(ct), "%pV", &vaf); 91 va_end(args); 92 } 93 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid); 94 #endif 95 96 const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) 97 { 98 switch (l4proto) { 99 case IPPROTO_UDP: return &nf_conntrack_l4proto_udp; 100 case IPPROTO_TCP: return &nf_conntrack_l4proto_tcp; 101 case IPPROTO_ICMP: return &nf_conntrack_l4proto_icmp; 102 #ifdef CONFIG_NF_CT_PROTO_DCCP 103 case IPPROTO_DCCP: return &nf_conntrack_l4proto_dccp; 104 #endif 105 #ifdef CONFIG_NF_CT_PROTO_SCTP 106 case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; 107 #endif 108 #ifdef CONFIG_NF_CT_PROTO_UDPLITE 109 case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite; 110 #endif 111 #ifdef CONFIG_NF_CT_PROTO_GRE 112 case IPPROTO_GRE: return &nf_conntrack_l4proto_gre; 113 #endif 114 #if IS_ENABLED(CONFIG_IPV6) 115 case IPPROTO_ICMPV6: return &nf_conntrack_l4proto_icmpv6; 116 #endif /* CONFIG_IPV6 */ 117 } 118 119 return &nf_conntrack_l4proto_generic; 120 }; 121 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find); 122 123 static unsigned int nf_confirm(struct sk_buff *skb, 124 unsigned int protoff, 125 struct nf_conn *ct, 126 enum ip_conntrack_info ctinfo) 127 { 128 const struct nf_conn_help *help; 129 130 help = nfct_help(ct); 131 if (help) { 132 const struct nf_conntrack_helper *helper; 133 int ret; 134 135 /* rcu_read_lock()ed by nf_hook_thresh */ 136 helper = rcu_dereference(help->helper); 137 if (helper) { 138 ret = helper->help(skb, 139 protoff, 140 ct, ctinfo); 141 if (ret != NF_ACCEPT) 142 return ret; 143 } 144 } 145 146 if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && 147 !nf_is_loopback_packet(skb)) { 148 if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) { 149 NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); 150 return NF_DROP; 151 } 152 } 153 154 /* We've seen it coming out the other side: confirm it */ 155 return nf_conntrack_confirm(skb); 156 } 157 158 static unsigned int ipv4_confirm(void *priv, 159 struct sk_buff *skb, 160 const struct nf_hook_state *state) 161 { 162 enum ip_conntrack_info ctinfo; 163 struct nf_conn *ct; 164 165 ct = nf_ct_get(skb, &ctinfo); 166 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 167 return nf_conntrack_confirm(skb); 168 169 return nf_confirm(skb, 170 skb_network_offset(skb) + ip_hdrlen(skb), 171 ct, ctinfo); 172 } 173 174 static unsigned int ipv4_conntrack_in(void *priv, 175 struct sk_buff *skb, 176 const struct nf_hook_state *state) 177 { 178 return nf_conntrack_in(skb, state); 179 } 180 181 static unsigned int ipv4_conntrack_local(void *priv, 182 struct sk_buff *skb, 183 const struct nf_hook_state *state) 184 { 185 if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */ 186 enum ip_conntrack_info ctinfo; 187 struct nf_conn *tmpl; 188 189 tmpl = nf_ct_get(skb, &ctinfo); 190 if (tmpl && nf_ct_is_template(tmpl)) { 191 /* when skipping ct, clear templates to avoid fooling 192 * later targets/matches 193 */ 194 skb->_nfct = 0; 195 nf_ct_put(tmpl); 196 } 197 return NF_ACCEPT; 198 } 199 200 return nf_conntrack_in(skb, state); 201 } 202 203 /* Connection tracking may drop packets, but never alters them, so 204 * make it the first hook. 205 */ 206 static const struct nf_hook_ops ipv4_conntrack_ops[] = { 207 { 208 .hook = ipv4_conntrack_in, 209 .pf = NFPROTO_IPV4, 210 .hooknum = NF_INET_PRE_ROUTING, 211 .priority = NF_IP_PRI_CONNTRACK, 212 }, 213 { 214 .hook = ipv4_conntrack_local, 215 .pf = NFPROTO_IPV4, 216 .hooknum = NF_INET_LOCAL_OUT, 217 .priority = NF_IP_PRI_CONNTRACK, 218 }, 219 { 220 .hook = ipv4_confirm, 221 .pf = NFPROTO_IPV4, 222 .hooknum = NF_INET_POST_ROUTING, 223 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 224 }, 225 { 226 .hook = ipv4_confirm, 227 .pf = NFPROTO_IPV4, 228 .hooknum = NF_INET_LOCAL_IN, 229 .priority = NF_IP_PRI_CONNTRACK_CONFIRM, 230 }, 231 }; 232 233 /* Fast function for those who don't want to parse /proc (and I don't 234 * blame them). 235 * Reversing the socket's dst/src point of view gives us the reply 236 * mapping. 237 */ 238 static int 239 getorigdst(struct sock *sk, int optval, void __user *user, int *len) 240 { 241 const struct inet_sock *inet = inet_sk(sk); 242 const struct nf_conntrack_tuple_hash *h; 243 struct nf_conntrack_tuple tuple; 244 245 memset(&tuple, 0, sizeof(tuple)); 246 247 lock_sock(sk); 248 tuple.src.u3.ip = inet->inet_rcv_saddr; 249 tuple.src.u.tcp.port = inet->inet_sport; 250 tuple.dst.u3.ip = inet->inet_daddr; 251 tuple.dst.u.tcp.port = inet->inet_dport; 252 tuple.src.l3num = PF_INET; 253 tuple.dst.protonum = sk->sk_protocol; 254 release_sock(sk); 255 256 /* We only do TCP and SCTP at the moment: is there a better way? */ 257 if (tuple.dst.protonum != IPPROTO_TCP && 258 tuple.dst.protonum != IPPROTO_SCTP) { 259 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); 260 return -ENOPROTOOPT; 261 } 262 263 if ((unsigned int)*len < sizeof(struct sockaddr_in)) { 264 pr_debug("SO_ORIGINAL_DST: len %d not %zu\n", 265 *len, sizeof(struct sockaddr_in)); 266 return -EINVAL; 267 } 268 269 h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 270 if (h) { 271 struct sockaddr_in sin; 272 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); 273 274 sin.sin_family = AF_INET; 275 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] 276 .tuple.dst.u.tcp.port; 277 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] 278 .tuple.dst.u3.ip; 279 memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); 280 281 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", 282 &sin.sin_addr.s_addr, ntohs(sin.sin_port)); 283 nf_ct_put(ct); 284 if (copy_to_user(user, &sin, sizeof(sin)) != 0) 285 return -EFAULT; 286 else 287 return 0; 288 } 289 pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", 290 &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), 291 &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); 292 return -ENOENT; 293 } 294 295 static struct nf_sockopt_ops so_getorigdst = { 296 .pf = PF_INET, 297 .get_optmin = SO_ORIGINAL_DST, 298 .get_optmax = SO_ORIGINAL_DST + 1, 299 .get = getorigdst, 300 .owner = THIS_MODULE, 301 }; 302 303 #if IS_ENABLED(CONFIG_IPV6) 304 static int 305 ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) 306 { 307 struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; 308 const struct ipv6_pinfo *inet6 = inet6_sk(sk); 309 const struct inet_sock *inet = inet_sk(sk); 310 const struct nf_conntrack_tuple_hash *h; 311 struct sockaddr_in6 sin6; 312 struct nf_conn *ct; 313 __be32 flow_label; 314 int bound_dev_if; 315 316 lock_sock(sk); 317 tuple.src.u3.in6 = sk->sk_v6_rcv_saddr; 318 tuple.src.u.tcp.port = inet->inet_sport; 319 tuple.dst.u3.in6 = sk->sk_v6_daddr; 320 tuple.dst.u.tcp.port = inet->inet_dport; 321 tuple.dst.protonum = sk->sk_protocol; 322 bound_dev_if = sk->sk_bound_dev_if; 323 flow_label = inet6->flow_label; 324 release_sock(sk); 325 326 if (tuple.dst.protonum != IPPROTO_TCP && 327 tuple.dst.protonum != IPPROTO_SCTP) 328 return -ENOPROTOOPT; 329 330 if (*len < 0 || (unsigned int)*len < sizeof(sin6)) 331 return -EINVAL; 332 333 h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple); 334 if (!h) { 335 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", 336 &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), 337 &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); 338 return -ENOENT; 339 } 340 341 ct = nf_ct_tuplehash_to_ctrack(h); 342 343 sin6.sin6_family = AF_INET6; 344 sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; 345 sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK; 346 memcpy(&sin6.sin6_addr, 347 &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, 348 sizeof(sin6.sin6_addr)); 349 350 nf_ct_put(ct); 351 sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if); 352 return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; 353 } 354 355 static struct nf_sockopt_ops so_getorigdst6 = { 356 .pf = NFPROTO_IPV6, 357 .get_optmin = IP6T_SO_ORIGINAL_DST, 358 .get_optmax = IP6T_SO_ORIGINAL_DST + 1, 359 .get = ipv6_getorigdst, 360 .owner = THIS_MODULE, 361 }; 362 363 static unsigned int ipv6_confirm(void *priv, 364 struct sk_buff *skb, 365 const struct nf_hook_state *state) 366 { 367 struct nf_conn *ct; 368 enum ip_conntrack_info ctinfo; 369 unsigned char pnum = ipv6_hdr(skb)->nexthdr; 370 __be16 frag_off; 371 int protoff; 372 373 ct = nf_ct_get(skb, &ctinfo); 374 if (!ct || ctinfo == IP_CT_RELATED_REPLY) 375 return nf_conntrack_confirm(skb); 376 377 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, 378 &frag_off); 379 if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { 380 pr_debug("proto header not found\n"); 381 return nf_conntrack_confirm(skb); 382 } 383 384 return nf_confirm(skb, protoff, ct, ctinfo); 385 } 386 387 static unsigned int ipv6_conntrack_in(void *priv, 388 struct sk_buff *skb, 389 const struct nf_hook_state *state) 390 { 391 return nf_conntrack_in(skb, state); 392 } 393 394 static unsigned int ipv6_conntrack_local(void *priv, 395 struct sk_buff *skb, 396 const struct nf_hook_state *state) 397 { 398 return nf_conntrack_in(skb, state); 399 } 400 401 static const struct nf_hook_ops ipv6_conntrack_ops[] = { 402 { 403 .hook = ipv6_conntrack_in, 404 .pf = NFPROTO_IPV6, 405 .hooknum = NF_INET_PRE_ROUTING, 406 .priority = NF_IP6_PRI_CONNTRACK, 407 }, 408 { 409 .hook = ipv6_conntrack_local, 410 .pf = NFPROTO_IPV6, 411 .hooknum = NF_INET_LOCAL_OUT, 412 .priority = NF_IP6_PRI_CONNTRACK, 413 }, 414 { 415 .hook = ipv6_confirm, 416 .pf = NFPROTO_IPV6, 417 .hooknum = NF_INET_POST_ROUTING, 418 .priority = NF_IP6_PRI_LAST, 419 }, 420 { 421 .hook = ipv6_confirm, 422 .pf = NFPROTO_IPV6, 423 .hooknum = NF_INET_LOCAL_IN, 424 .priority = NF_IP6_PRI_LAST - 1, 425 }, 426 }; 427 #endif 428 429 static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto) 430 { 431 u8 nfproto = (unsigned long)_nfproto; 432 433 if (nf_ct_l3num(ct) != nfproto) 434 return 0; 435 436 if (nf_ct_protonum(ct) == IPPROTO_TCP && 437 ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) { 438 ct->proto.tcp.seen[0].td_maxwin = 0; 439 ct->proto.tcp.seen[1].td_maxwin = 0; 440 } 441 442 return 0; 443 } 444 445 static int nf_ct_netns_do_get(struct net *net, u8 nfproto) 446 { 447 struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); 448 bool fixup_needed = false; 449 int err = 0; 450 451 mutex_lock(&nf_ct_proto_mutex); 452 453 switch (nfproto) { 454 case NFPROTO_IPV4: 455 cnet->users4++; 456 if (cnet->users4 > 1) 457 goto out_unlock; 458 err = nf_defrag_ipv4_enable(net); 459 if (err) { 460 cnet->users4 = 0; 461 goto out_unlock; 462 } 463 464 err = nf_register_net_hooks(net, ipv4_conntrack_ops, 465 ARRAY_SIZE(ipv4_conntrack_ops)); 466 if (err) 467 cnet->users4 = 0; 468 else 469 fixup_needed = true; 470 break; 471 #if IS_ENABLED(CONFIG_IPV6) 472 case NFPROTO_IPV6: 473 cnet->users6++; 474 if (cnet->users6 > 1) 475 goto out_unlock; 476 err = nf_defrag_ipv6_enable(net); 477 if (err < 0) { 478 cnet->users6 = 0; 479 goto out_unlock; 480 } 481 482 err = nf_register_net_hooks(net, ipv6_conntrack_ops, 483 ARRAY_SIZE(ipv6_conntrack_ops)); 484 if (err) 485 cnet->users6 = 0; 486 else 487 fixup_needed = true; 488 break; 489 #endif 490 default: 491 err = -EPROTO; 492 break; 493 } 494 out_unlock: 495 mutex_unlock(&nf_ct_proto_mutex); 496 497 if (fixup_needed) 498 nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup, 499 (void *)(unsigned long)nfproto, 0, 0); 500 501 return err; 502 } 503 504 static void nf_ct_netns_do_put(struct net *net, u8 nfproto) 505 { 506 struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id); 507 508 mutex_lock(&nf_ct_proto_mutex); 509 switch (nfproto) { 510 case NFPROTO_IPV4: 511 if (cnet->users4 && (--cnet->users4 == 0)) 512 nf_unregister_net_hooks(net, ipv4_conntrack_ops, 513 ARRAY_SIZE(ipv4_conntrack_ops)); 514 break; 515 #if IS_ENABLED(CONFIG_IPV6) 516 case NFPROTO_IPV6: 517 if (cnet->users6 && (--cnet->users6 == 0)) 518 nf_unregister_net_hooks(net, ipv6_conntrack_ops, 519 ARRAY_SIZE(ipv6_conntrack_ops)); 520 break; 521 #endif 522 } 523 524 mutex_unlock(&nf_ct_proto_mutex); 525 } 526 527 int nf_ct_netns_get(struct net *net, u8 nfproto) 528 { 529 int err; 530 531 if (nfproto == NFPROTO_INET) { 532 err = nf_ct_netns_do_get(net, NFPROTO_IPV4); 533 if (err < 0) 534 goto err1; 535 err = nf_ct_netns_do_get(net, NFPROTO_IPV6); 536 if (err < 0) 537 goto err2; 538 } else { 539 err = nf_ct_netns_do_get(net, nfproto); 540 if (err < 0) 541 goto err1; 542 } 543 return 0; 544 545 err2: 546 nf_ct_netns_put(net, NFPROTO_IPV4); 547 err1: 548 return err; 549 } 550 EXPORT_SYMBOL_GPL(nf_ct_netns_get); 551 552 void nf_ct_netns_put(struct net *net, uint8_t nfproto) 553 { 554 if (nfproto == NFPROTO_INET) { 555 nf_ct_netns_do_put(net, NFPROTO_IPV4); 556 nf_ct_netns_do_put(net, NFPROTO_IPV6); 557 } else { 558 nf_ct_netns_do_put(net, nfproto); 559 } 560 } 561 EXPORT_SYMBOL_GPL(nf_ct_netns_put); 562 563 int nf_conntrack_proto_init(void) 564 { 565 int ret; 566 567 ret = nf_register_sockopt(&so_getorigdst); 568 if (ret < 0) 569 return ret; 570 571 #if IS_ENABLED(CONFIG_IPV6) 572 ret = nf_register_sockopt(&so_getorigdst6); 573 if (ret < 0) 574 goto cleanup_sockopt; 575 #endif 576 577 return ret; 578 579 #if IS_ENABLED(CONFIG_IPV6) 580 cleanup_sockopt: 581 nf_unregister_sockopt(&so_getorigdst6); 582 #endif 583 return ret; 584 } 585 586 void nf_conntrack_proto_fini(void) 587 { 588 nf_unregister_sockopt(&so_getorigdst); 589 #if IS_ENABLED(CONFIG_IPV6) 590 nf_unregister_sockopt(&so_getorigdst6); 591 #endif 592 } 593 594 void nf_conntrack_proto_pernet_init(struct net *net) 595 { 596 nf_conntrack_generic_init_net(net); 597 nf_conntrack_udp_init_net(net); 598 nf_conntrack_tcp_init_net(net); 599 nf_conntrack_icmp_init_net(net); 600 #if IS_ENABLED(CONFIG_IPV6) 601 nf_conntrack_icmpv6_init_net(net); 602 #endif 603 #ifdef CONFIG_NF_CT_PROTO_DCCP 604 nf_conntrack_dccp_init_net(net); 605 #endif 606 #ifdef CONFIG_NF_CT_PROTO_SCTP 607 nf_conntrack_sctp_init_net(net); 608 #endif 609 #ifdef CONFIG_NF_CT_PROTO_GRE 610 nf_conntrack_gre_init_net(net); 611 #endif 612 } 613 614 void nf_conntrack_proto_pernet_fini(struct net *net) 615 { 616 #ifdef CONFIG_NF_CT_PROTO_GRE 617 nf_ct_gre_keymap_flush(net); 618 #endif 619 } 620 621 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, 622 &nf_conntrack_htable_size, 0600); 623 624 MODULE_ALIAS("ip_conntrack"); 625 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); 626 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); 627 MODULE_LICENSE("GPL"); 628
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.