1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IPVS An implementation of the IP virtual server support for the 4 * LINUX operating system. IPVS is now implemented as a module 5 * over the NetFilter framework. IPVS can be used to build a 6 * high-performance and highly available server based on a 7 * cluster of servers. 8 * 9 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 10 * Peter Kese <peter.kese@ijs.si> 11 * Julian Anastasov <ja@ssi.bg> 12 * 13 * Changes: 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/init.h> 21 #include <linux/types.h> 22 #include <linux/capability.h> 23 #include <linux/fs.h> 24 #include <linux/sysctl.h> 25 #include <linux/proc_fs.h> 26 #include <linux/workqueue.h> 27 #include <linux/swap.h> 28 #include <linux/seq_file.h> 29 #include <linux/slab.h> 30 31 #include <linux/netfilter.h> 32 #include <linux/netfilter_ipv4.h> 33 #include <linux/mutex.h> 34 35 #include <net/net_namespace.h> 36 #include <linux/nsproxy.h> 37 #include <net/ip.h> 38 #ifdef CONFIG_IP_VS_IPV6 39 #include <net/ipv6.h> 40 #include <net/ip6_route.h> 41 #include <net/netfilter/ipv6/nf_defrag_ipv6.h> 42 #endif 43 #include <net/route.h> 44 #include <net/sock.h> 45 #include <net/genetlink.h> 46 47 #include <linux/uaccess.h> 48 49 #include <net/ip_vs.h> 50 51 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 52 static DEFINE_MUTEX(__ip_vs_mutex); 53 54 /* sysctl variables */ 55 56 #ifdef CONFIG_IP_VS_DEBUG 57 static int sysctl_ip_vs_debug_level = 0; 58 59 int ip_vs_get_debug_level(void) 60 { 61 return sysctl_ip_vs_debug_level; 62 } 63 #endif 64 65 66 /* Protos */ 67 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 68 69 70 #ifdef CONFIG_IP_VS_IPV6 71 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 72 static bool __ip_vs_addr_is_local_v6(struct net *net, 73 const struct in6_addr *addr) 74 { 75 struct flowi6 fl6 = { 76 .daddr = *addr, 77 }; 78 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 79 bool is_local; 80 81 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 82 83 dst_release(dst); 84 return is_local; 85 } 86 #endif 87 88 #ifdef CONFIG_SYSCTL 89 /* 90 * update_defense_level is called from keventd and from sysctl, 91 * so it needs to protect itself from softirqs 92 */ 93 static void update_defense_level(struct netns_ipvs *ipvs) 94 { 95 struct sysinfo i; 96 int availmem; 97 int nomem; 98 int to_change = -1; 99 100 /* we only count free and buffered memory (in pages) */ 101 si_meminfo(&i); 102 availmem = i.freeram + i.bufferram; 103 /* however in linux 2.5 the i.bufferram is total page cache size, 104 we need adjust it */ 105 /* si_swapinfo(&i); */ 106 /* availmem = availmem - (i.totalswap - i.freeswap); */ 107 108 nomem = (availmem < ipvs->sysctl_amemthresh); 109 110 local_bh_disable(); 111 112 /* drop_entry */ 113 spin_lock(&ipvs->dropentry_lock); 114 switch (ipvs->sysctl_drop_entry) { 115 case 0: 116 atomic_set(&ipvs->dropentry, 0); 117 break; 118 case 1: 119 if (nomem) { 120 atomic_set(&ipvs->dropentry, 1); 121 ipvs->sysctl_drop_entry = 2; 122 } else { 123 atomic_set(&ipvs->dropentry, 0); 124 } 125 break; 126 case 2: 127 if (nomem) { 128 atomic_set(&ipvs->dropentry, 1); 129 } else { 130 atomic_set(&ipvs->dropentry, 0); 131 ipvs->sysctl_drop_entry = 1; 132 } 133 break; 134 case 3: 135 atomic_set(&ipvs->dropentry, 1); 136 break; 137 } 138 spin_unlock(&ipvs->dropentry_lock); 139 140 /* drop_packet */ 141 spin_lock(&ipvs->droppacket_lock); 142 switch (ipvs->sysctl_drop_packet) { 143 case 0: 144 ipvs->drop_rate = 0; 145 break; 146 case 1: 147 if (nomem) { 148 ipvs->drop_rate = ipvs->drop_counter 149 = ipvs->sysctl_amemthresh / 150 (ipvs->sysctl_amemthresh-availmem); 151 ipvs->sysctl_drop_packet = 2; 152 } else { 153 ipvs->drop_rate = 0; 154 } 155 break; 156 case 2: 157 if (nomem) { 158 ipvs->drop_rate = ipvs->drop_counter 159 = ipvs->sysctl_amemthresh / 160 (ipvs->sysctl_amemthresh-availmem); 161 } else { 162 ipvs->drop_rate = 0; 163 ipvs->sysctl_drop_packet = 1; 164 } 165 break; 166 case 3: 167 ipvs->drop_rate = ipvs->sysctl_am_droprate; 168 break; 169 } 170 spin_unlock(&ipvs->droppacket_lock); 171 172 /* secure_tcp */ 173 spin_lock(&ipvs->securetcp_lock); 174 switch (ipvs->sysctl_secure_tcp) { 175 case 0: 176 if (ipvs->old_secure_tcp >= 2) 177 to_change = 0; 178 break; 179 case 1: 180 if (nomem) { 181 if (ipvs->old_secure_tcp < 2) 182 to_change = 1; 183 ipvs->sysctl_secure_tcp = 2; 184 } else { 185 if (ipvs->old_secure_tcp >= 2) 186 to_change = 0; 187 } 188 break; 189 case 2: 190 if (nomem) { 191 if (ipvs->old_secure_tcp < 2) 192 to_change = 1; 193 } else { 194 if (ipvs->old_secure_tcp >= 2) 195 to_change = 0; 196 ipvs->sysctl_secure_tcp = 1; 197 } 198 break; 199 case 3: 200 if (ipvs->old_secure_tcp < 2) 201 to_change = 1; 202 break; 203 } 204 ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; 205 if (to_change >= 0) 206 ip_vs_protocol_timeout_change(ipvs, 207 ipvs->sysctl_secure_tcp > 1); 208 spin_unlock(&ipvs->securetcp_lock); 209 210 local_bh_enable(); 211 } 212 213 /* Handler for delayed work for expiring no 214 * destination connections 215 */ 216 static void expire_nodest_conn_handler(struct work_struct *work) 217 { 218 struct netns_ipvs *ipvs; 219 220 ipvs = container_of(work, struct netns_ipvs, 221 expire_nodest_conn_work.work); 222 ip_vs_expire_nodest_conn_flush(ipvs); 223 } 224 225 /* 226 * Timer for checking the defense 227 */ 228 #define DEFENSE_TIMER_PERIOD 1*HZ 229 230 static void defense_work_handler(struct work_struct *work) 231 { 232 struct netns_ipvs *ipvs = 233 container_of(work, struct netns_ipvs, defense_work.work); 234 235 update_defense_level(ipvs); 236 if (atomic_read(&ipvs->dropentry)) 237 ip_vs_random_dropentry(ipvs); 238 queue_delayed_work(system_long_wq, &ipvs->defense_work, 239 DEFENSE_TIMER_PERIOD); 240 } 241 #endif 242 243 int 244 ip_vs_use_count_inc(void) 245 { 246 return try_module_get(THIS_MODULE); 247 } 248 249 void 250 ip_vs_use_count_dec(void) 251 { 252 module_put(THIS_MODULE); 253 } 254 255 256 /* 257 * Hash table: for virtual service lookups 258 */ 259 #define IP_VS_SVC_TAB_BITS 8 260 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 261 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 262 263 /* the service table hashed by <protocol, addr, port> */ 264 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 265 /* the service table hashed by fwmark */ 266 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 267 268 269 /* 270 * Returns hash value for virtual service 271 */ 272 static inline unsigned int 273 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, 274 const union nf_inet_addr *addr, __be16 port) 275 { 276 unsigned int porth = ntohs(port); 277 __be32 addr_fold = addr->ip; 278 __u32 ahash; 279 280 #ifdef CONFIG_IP_VS_IPV6 281 if (af == AF_INET6) 282 addr_fold = addr->ip6[0]^addr->ip6[1]^ 283 addr->ip6[2]^addr->ip6[3]; 284 #endif 285 ahash = ntohl(addr_fold); 286 ahash ^= ((size_t) ipvs >> 8); 287 288 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 289 IP_VS_SVC_TAB_MASK; 290 } 291 292 /* 293 * Returns hash value of fwmark for virtual service lookup 294 */ 295 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) 296 { 297 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 298 } 299 300 /* 301 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 302 * or in the ip_vs_svc_fwm_table by fwmark. 303 * Should be called with locked tables. 304 */ 305 static int ip_vs_svc_hash(struct ip_vs_service *svc) 306 { 307 unsigned int hash; 308 309 if (svc->flags & IP_VS_SVC_F_HASHED) { 310 pr_err("%s(): request for already hashed, called from %pS\n", 311 __func__, __builtin_return_address(0)); 312 return 0; 313 } 314 315 if (svc->fwmark == 0) { 316 /* 317 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 318 */ 319 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, 320 &svc->addr, svc->port); 321 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 322 } else { 323 /* 324 * Hash it by fwmark in svc_fwm_table 325 */ 326 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); 327 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 328 } 329 330 svc->flags |= IP_VS_SVC_F_HASHED; 331 /* increase its refcnt because it is referenced by the svc table */ 332 atomic_inc(&svc->refcnt); 333 return 1; 334 } 335 336 337 /* 338 * Unhashes a service from svc_table / svc_fwm_table. 339 * Should be called with locked tables. 340 */ 341 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 342 { 343 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 344 pr_err("%s(): request for unhash flagged, called from %pS\n", 345 __func__, __builtin_return_address(0)); 346 return 0; 347 } 348 349 if (svc->fwmark == 0) { 350 /* Remove it from the svc_table table */ 351 hlist_del_rcu(&svc->s_list); 352 } else { 353 /* Remove it from the svc_fwm_table table */ 354 hlist_del_rcu(&svc->f_list); 355 } 356 357 svc->flags &= ~IP_VS_SVC_F_HASHED; 358 atomic_dec(&svc->refcnt); 359 return 1; 360 } 361 362 363 /* 364 * Get service by {netns, proto,addr,port} in the service table. 365 */ 366 static inline struct ip_vs_service * 367 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, 368 const union nf_inet_addr *vaddr, __be16 vport) 369 { 370 unsigned int hash; 371 struct ip_vs_service *svc; 372 373 /* Check for "full" addressed entries */ 374 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); 375 376 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 377 if ((svc->af == af) 378 && ip_vs_addr_equal(af, &svc->addr, vaddr) 379 && (svc->port == vport) 380 && (svc->protocol == protocol) 381 && (svc->ipvs == ipvs)) { 382 /* HIT */ 383 return svc; 384 } 385 } 386 387 return NULL; 388 } 389 390 391 /* 392 * Get service by {fwmark} in the service table. 393 */ 394 static inline struct ip_vs_service * 395 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) 396 { 397 unsigned int hash; 398 struct ip_vs_service *svc; 399 400 /* Check for fwmark addressed entries */ 401 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); 402 403 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 404 if (svc->fwmark == fwmark && svc->af == af 405 && (svc->ipvs == ipvs)) { 406 /* HIT */ 407 return svc; 408 } 409 } 410 411 return NULL; 412 } 413 414 /* Find service, called under RCU lock */ 415 struct ip_vs_service * 416 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, 417 const union nf_inet_addr *vaddr, __be16 vport) 418 { 419 struct ip_vs_service *svc; 420 421 /* 422 * Check the table hashed by fwmark first 423 */ 424 if (fwmark) { 425 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); 426 if (svc) 427 goto out; 428 } 429 430 /* 431 * Check the table hashed by <protocol,addr,port> 432 * for "full" addressed entries 433 */ 434 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); 435 436 if (!svc && protocol == IPPROTO_TCP && 437 atomic_read(&ipvs->ftpsvc_counter) && 438 (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { 439 /* 440 * Check if ftp service entry exists, the packet 441 * might belong to FTP data connections. 442 */ 443 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); 444 } 445 446 if (svc == NULL 447 && atomic_read(&ipvs->nullsvc_counter)) { 448 /* 449 * Check if the catch-all port (port zero) exists 450 */ 451 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); 452 } 453 454 out: 455 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 456 fwmark, ip_vs_proto_name(protocol), 457 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 458 svc ? "hit" : "not hit"); 459 460 return svc; 461 } 462 463 464 static inline void 465 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 466 { 467 atomic_inc(&svc->refcnt); 468 rcu_assign_pointer(dest->svc, svc); 469 } 470 471 static void ip_vs_service_free(struct ip_vs_service *svc) 472 { 473 free_percpu(svc->stats.cpustats); 474 kfree(svc); 475 } 476 477 static void ip_vs_service_rcu_free(struct rcu_head *head) 478 { 479 struct ip_vs_service *svc; 480 481 svc = container_of(head, struct ip_vs_service, rcu_head); 482 ip_vs_service_free(svc); 483 } 484 485 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 486 { 487 if (atomic_dec_and_test(&svc->refcnt)) { 488 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 489 svc->fwmark, 490 IP_VS_DBG_ADDR(svc->af, &svc->addr), 491 ntohs(svc->port)); 492 if (do_delay) 493 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 494 else 495 ip_vs_service_free(svc); 496 } 497 } 498 499 500 /* 501 * Returns hash value for real service 502 */ 503 static inline unsigned int ip_vs_rs_hashkey(int af, 504 const union nf_inet_addr *addr, 505 __be16 port) 506 { 507 unsigned int porth = ntohs(port); 508 __be32 addr_fold = addr->ip; 509 510 #ifdef CONFIG_IP_VS_IPV6 511 if (af == AF_INET6) 512 addr_fold = addr->ip6[0]^addr->ip6[1]^ 513 addr->ip6[2]^addr->ip6[3]; 514 #endif 515 516 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 517 & IP_VS_RTAB_MASK; 518 } 519 520 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 521 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 522 { 523 unsigned int hash; 524 __be16 port; 525 526 if (dest->in_rs_table) 527 return; 528 529 switch (IP_VS_DFWD_METHOD(dest)) { 530 case IP_VS_CONN_F_MASQ: 531 port = dest->port; 532 break; 533 case IP_VS_CONN_F_TUNNEL: 534 switch (dest->tun_type) { 535 case IP_VS_CONN_F_TUNNEL_TYPE_GUE: 536 port = dest->tun_port; 537 break; 538 case IP_VS_CONN_F_TUNNEL_TYPE_IPIP: 539 case IP_VS_CONN_F_TUNNEL_TYPE_GRE: 540 port = 0; 541 break; 542 default: 543 return; 544 } 545 break; 546 default: 547 return; 548 } 549 550 /* 551 * Hash by proto,addr,port, 552 * which are the parameters of the real service. 553 */ 554 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, port); 555 556 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 557 dest->in_rs_table = 1; 558 } 559 560 /* Unhash ip_vs_dest from rs_table. */ 561 static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 562 { 563 /* 564 * Remove it from the rs_table table. 565 */ 566 if (dest->in_rs_table) { 567 hlist_del_rcu(&dest->d_list); 568 dest->in_rs_table = 0; 569 } 570 } 571 572 /* Check if real service by <proto,addr,port> is present */ 573 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, 574 const union nf_inet_addr *daddr, __be16 dport) 575 { 576 unsigned int hash; 577 struct ip_vs_dest *dest; 578 579 /* Check for "full" addressed entries */ 580 hash = ip_vs_rs_hashkey(af, daddr, dport); 581 582 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 583 if (dest->port == dport && 584 dest->af == af && 585 ip_vs_addr_equal(af, &dest->addr, daddr) && 586 (dest->protocol == protocol || dest->vfwmark) && 587 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 588 /* HIT */ 589 return true; 590 } 591 } 592 593 return false; 594 } 595 596 /* Find real service record by <proto,addr,port>. 597 * In case of multiple records with the same <proto,addr,port>, only 598 * the first found record is returned. 599 * 600 * To be called under RCU lock. 601 */ 602 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, 603 __u16 protocol, 604 const union nf_inet_addr *daddr, 605 __be16 dport) 606 { 607 unsigned int hash; 608 struct ip_vs_dest *dest; 609 610 /* Check for "full" addressed entries */ 611 hash = ip_vs_rs_hashkey(af, daddr, dport); 612 613 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 614 if (dest->port == dport && 615 dest->af == af && 616 ip_vs_addr_equal(af, &dest->addr, daddr) && 617 (dest->protocol == protocol || dest->vfwmark) && 618 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_MASQ) { 619 /* HIT */ 620 return dest; 621 } 622 } 623 624 return NULL; 625 } 626 627 /* Find real service record by <af,addr,tun_port>. 628 * In case of multiple records with the same <af,addr,tun_port>, only 629 * the first found record is returned. 630 * 631 * To be called under RCU lock. 632 */ 633 struct ip_vs_dest *ip_vs_find_tunnel(struct netns_ipvs *ipvs, int af, 634 const union nf_inet_addr *daddr, 635 __be16 tun_port) 636 { 637 struct ip_vs_dest *dest; 638 unsigned int hash; 639 640 /* Check for "full" addressed entries */ 641 hash = ip_vs_rs_hashkey(af, daddr, tun_port); 642 643 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 644 if (dest->tun_port == tun_port && 645 dest->af == af && 646 ip_vs_addr_equal(af, &dest->addr, daddr) && 647 IP_VS_DFWD_METHOD(dest) == IP_VS_CONN_F_TUNNEL) { 648 /* HIT */ 649 return dest; 650 } 651 } 652 653 return NULL; 654 } 655 656 /* Lookup destination by {addr,port} in the given service 657 * Called under RCU lock. 658 */ 659 static struct ip_vs_dest * 660 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 661 const union nf_inet_addr *daddr, __be16 dport) 662 { 663 struct ip_vs_dest *dest; 664 665 /* 666 * Find the destination for the given service 667 */ 668 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 669 if ((dest->af == dest_af) && 670 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 671 (dest->port == dport)) { 672 /* HIT */ 673 return dest; 674 } 675 } 676 677 return NULL; 678 } 679 680 /* 681 * Find destination by {daddr,dport,vaddr,protocol} 682 * Created to be used in ip_vs_process_message() in 683 * the backup synchronization daemon. It finds the 684 * destination to be bound to the received connection 685 * on the backup. 686 * Called under RCU lock, no refcnt is returned. 687 */ 688 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, 689 const union nf_inet_addr *daddr, 690 __be16 dport, 691 const union nf_inet_addr *vaddr, 692 __be16 vport, __u16 protocol, __u32 fwmark, 693 __u32 flags) 694 { 695 struct ip_vs_dest *dest; 696 struct ip_vs_service *svc; 697 __be16 port = dport; 698 699 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); 700 if (!svc) 701 return NULL; 702 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 703 port = 0; 704 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 705 if (!dest) 706 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 707 return dest; 708 } 709 710 void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 711 { 712 struct ip_vs_dest_dst *dest_dst = container_of(head, 713 struct ip_vs_dest_dst, 714 rcu_head); 715 716 dst_release(dest_dst->dst_cache); 717 kfree(dest_dst); 718 } 719 720 /* Release dest_dst and dst_cache for dest in user context */ 721 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 722 { 723 struct ip_vs_dest_dst *old; 724 725 old = rcu_dereference_protected(dest->dest_dst, 1); 726 if (old) { 727 RCU_INIT_POINTER(dest->dest_dst, NULL); 728 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 729 } 730 } 731 732 /* 733 * Lookup dest by {svc,addr,port} in the destination trash. 734 * The destination trash is used to hold the destinations that are removed 735 * from the service table but are still referenced by some conn entries. 736 * The reason to add the destination trash is when the dest is temporary 737 * down (either by administrator or by monitor program), the dest can be 738 * picked back from the trash, the remaining connections to the dest can 739 * continue, and the counting information of the dest is also useful for 740 * scheduling. 741 */ 742 static struct ip_vs_dest * 743 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 744 const union nf_inet_addr *daddr, __be16 dport) 745 { 746 struct ip_vs_dest *dest; 747 struct netns_ipvs *ipvs = svc->ipvs; 748 749 /* 750 * Find the destination in trash 751 */ 752 spin_lock_bh(&ipvs->dest_trash_lock); 753 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 754 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 755 "dest->refcnt=%d\n", 756 dest->vfwmark, 757 IP_VS_DBG_ADDR(dest->af, &dest->addr), 758 ntohs(dest->port), 759 refcount_read(&dest->refcnt)); 760 if (dest->af == dest_af && 761 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 762 dest->port == dport && 763 dest->vfwmark == svc->fwmark && 764 dest->protocol == svc->protocol && 765 (svc->fwmark || 766 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 767 dest->vport == svc->port))) { 768 /* HIT */ 769 list_del(&dest->t_list); 770 goto out; 771 } 772 } 773 774 dest = NULL; 775 776 out: 777 spin_unlock_bh(&ipvs->dest_trash_lock); 778 779 return dest; 780 } 781 782 static void ip_vs_dest_free(struct ip_vs_dest *dest) 783 { 784 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 785 786 __ip_vs_dst_cache_reset(dest); 787 __ip_vs_svc_put(svc, false); 788 free_percpu(dest->stats.cpustats); 789 ip_vs_dest_put_and_free(dest); 790 } 791 792 /* 793 * Clean up all the destinations in the trash 794 * Called by the ip_vs_control_cleanup() 795 * 796 * When the ip_vs_control_clearup is activated by ipvs module exit, 797 * the service tables must have been flushed and all the connections 798 * are expired, and the refcnt of each destination in the trash must 799 * be 1, so we simply release them here. 800 */ 801 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) 802 { 803 struct ip_vs_dest *dest, *nxt; 804 805 del_timer_sync(&ipvs->dest_trash_timer); 806 /* No need to use dest_trash_lock */ 807 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 808 list_del(&dest->t_list); 809 ip_vs_dest_free(dest); 810 } 811 } 812 813 static void 814 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) 815 { 816 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c 817 818 spin_lock_bh(&src->lock); 819 820 IP_VS_SHOW_STATS_COUNTER(conns); 821 IP_VS_SHOW_STATS_COUNTER(inpkts); 822 IP_VS_SHOW_STATS_COUNTER(outpkts); 823 IP_VS_SHOW_STATS_COUNTER(inbytes); 824 IP_VS_SHOW_STATS_COUNTER(outbytes); 825 826 ip_vs_read_estimator(dst, src); 827 828 spin_unlock_bh(&src->lock); 829 } 830 831 static void 832 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) 833 { 834 dst->conns = (u32)src->conns; 835 dst->inpkts = (u32)src->inpkts; 836 dst->outpkts = (u32)src->outpkts; 837 dst->inbytes = src->inbytes; 838 dst->outbytes = src->outbytes; 839 dst->cps = (u32)src->cps; 840 dst->inpps = (u32)src->inpps; 841 dst->outpps = (u32)src->outpps; 842 dst->inbps = (u32)src->inbps; 843 dst->outbps = (u32)src->outbps; 844 } 845 846 static void 847 ip_vs_zero_stats(struct ip_vs_stats *stats) 848 { 849 spin_lock_bh(&stats->lock); 850 851 /* get current counters as zero point, rates are zeroed */ 852 853 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c 854 855 IP_VS_ZERO_STATS_COUNTER(conns); 856 IP_VS_ZERO_STATS_COUNTER(inpkts); 857 IP_VS_ZERO_STATS_COUNTER(outpkts); 858 IP_VS_ZERO_STATS_COUNTER(inbytes); 859 IP_VS_ZERO_STATS_COUNTER(outbytes); 860 861 ip_vs_zero_estimator(stats); 862 863 spin_unlock_bh(&stats->lock); 864 } 865 866 /* 867 * Update a destination in the given service 868 */ 869 static void 870 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 871 struct ip_vs_dest_user_kern *udest, int add) 872 { 873 struct netns_ipvs *ipvs = svc->ipvs; 874 struct ip_vs_service *old_svc; 875 struct ip_vs_scheduler *sched; 876 int conn_flags; 877 878 /* We cannot modify an address and change the address family */ 879 BUG_ON(!add && udest->af != dest->af); 880 881 if (add && udest->af != svc->af) 882 ipvs->mixed_address_family_dests++; 883 884 /* keep the last_weight with latest non-0 weight */ 885 if (add || udest->weight != 0) 886 atomic_set(&dest->last_weight, udest->weight); 887 888 /* set the weight and the flags */ 889 atomic_set(&dest->weight, udest->weight); 890 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 891 conn_flags |= IP_VS_CONN_F_INACTIVE; 892 893 /* Need to rehash? */ 894 if ((udest->conn_flags & IP_VS_CONN_F_FWD_MASK) != 895 IP_VS_DFWD_METHOD(dest) || 896 udest->tun_type != dest->tun_type || 897 udest->tun_port != dest->tun_port) 898 ip_vs_rs_unhash(dest); 899 900 /* set the tunnel info */ 901 dest->tun_type = udest->tun_type; 902 dest->tun_port = udest->tun_port; 903 dest->tun_flags = udest->tun_flags; 904 905 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 906 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 907 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 908 } else { 909 /* FTP-NAT requires conntrack for mangling */ 910 if (svc->port == FTPPORT) 911 ip_vs_register_conntrack(svc); 912 } 913 atomic_set(&dest->conn_flags, conn_flags); 914 /* Put the real service in rs_table if not present. */ 915 ip_vs_rs_hash(ipvs, dest); 916 917 /* bind the service */ 918 old_svc = rcu_dereference_protected(dest->svc, 1); 919 if (!old_svc) { 920 __ip_vs_bind_svc(dest, svc); 921 } else { 922 if (old_svc != svc) { 923 ip_vs_zero_stats(&dest->stats); 924 __ip_vs_bind_svc(dest, svc); 925 __ip_vs_svc_put(old_svc, true); 926 } 927 } 928 929 /* set the dest status flags */ 930 dest->flags |= IP_VS_DEST_F_AVAILABLE; 931 932 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 933 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 934 dest->u_threshold = udest->u_threshold; 935 dest->l_threshold = udest->l_threshold; 936 937 dest->af = udest->af; 938 939 spin_lock_bh(&dest->dst_lock); 940 __ip_vs_dst_cache_reset(dest); 941 spin_unlock_bh(&dest->dst_lock); 942 943 if (add) { 944 ip_vs_start_estimator(svc->ipvs, &dest->stats); 945 list_add_rcu(&dest->n_list, &svc->destinations); 946 svc->num_dests++; 947 sched = rcu_dereference_protected(svc->scheduler, 1); 948 if (sched && sched->add_dest) 949 sched->add_dest(svc, dest); 950 } else { 951 sched = rcu_dereference_protected(svc->scheduler, 1); 952 if (sched && sched->upd_dest) 953 sched->upd_dest(svc, dest); 954 } 955 } 956 957 958 /* 959 * Create a destination for the given service 960 */ 961 static int 962 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 963 struct ip_vs_dest **dest_p) 964 { 965 struct ip_vs_dest *dest; 966 unsigned int atype, i; 967 968 EnterFunction(2); 969 970 #ifdef CONFIG_IP_VS_IPV6 971 if (udest->af == AF_INET6) { 972 int ret; 973 974 atype = ipv6_addr_type(&udest->addr.in6); 975 if ((!(atype & IPV6_ADDR_UNICAST) || 976 atype & IPV6_ADDR_LINKLOCAL) && 977 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) 978 return -EINVAL; 979 980 ret = nf_defrag_ipv6_enable(svc->ipvs->net); 981 if (ret) 982 return ret; 983 } else 984 #endif 985 { 986 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); 987 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 988 return -EINVAL; 989 } 990 991 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 992 if (dest == NULL) 993 return -ENOMEM; 994 995 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 996 if (!dest->stats.cpustats) 997 goto err_alloc; 998 999 for_each_possible_cpu(i) { 1000 struct ip_vs_cpu_stats *ip_vs_dest_stats; 1001 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 1002 u64_stats_init(&ip_vs_dest_stats->syncp); 1003 } 1004 1005 dest->af = udest->af; 1006 dest->protocol = svc->protocol; 1007 dest->vaddr = svc->addr; 1008 dest->vport = svc->port; 1009 dest->vfwmark = svc->fwmark; 1010 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 1011 dest->port = udest->port; 1012 1013 atomic_set(&dest->activeconns, 0); 1014 atomic_set(&dest->inactconns, 0); 1015 atomic_set(&dest->persistconns, 0); 1016 refcount_set(&dest->refcnt, 1); 1017 1018 INIT_HLIST_NODE(&dest->d_list); 1019 spin_lock_init(&dest->dst_lock); 1020 spin_lock_init(&dest->stats.lock); 1021 __ip_vs_update_dest(svc, dest, udest, 1); 1022 1023 *dest_p = dest; 1024 1025 LeaveFunction(2); 1026 return 0; 1027 1028 err_alloc: 1029 kfree(dest); 1030 return -ENOMEM; 1031 } 1032 1033 1034 /* 1035 * Add a destination into an existing service 1036 */ 1037 static int 1038 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1039 { 1040 struct ip_vs_dest *dest; 1041 union nf_inet_addr daddr; 1042 __be16 dport = udest->port; 1043 int ret; 1044 1045 EnterFunction(2); 1046 1047 if (udest->weight < 0) { 1048 pr_err("%s(): server weight less than zero\n", __func__); 1049 return -ERANGE; 1050 } 1051 1052 if (udest->l_threshold > udest->u_threshold) { 1053 pr_err("%s(): lower threshold is higher than upper threshold\n", 1054 __func__); 1055 return -ERANGE; 1056 } 1057 1058 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1059 if (udest->tun_port == 0) { 1060 pr_err("%s(): tunnel port is zero\n", __func__); 1061 return -EINVAL; 1062 } 1063 } 1064 1065 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1066 1067 /* We use function that requires RCU lock */ 1068 rcu_read_lock(); 1069 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1070 rcu_read_unlock(); 1071 1072 if (dest != NULL) { 1073 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 1074 return -EEXIST; 1075 } 1076 1077 /* 1078 * Check if the dest already exists in the trash and 1079 * is from the same service 1080 */ 1081 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 1082 1083 if (dest != NULL) { 1084 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 1085 "dest->refcnt=%d, service %u/%s:%u\n", 1086 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 1087 refcount_read(&dest->refcnt), 1088 dest->vfwmark, 1089 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 1090 ntohs(dest->vport)); 1091 1092 __ip_vs_update_dest(svc, dest, udest, 1); 1093 ret = 0; 1094 } else { 1095 /* 1096 * Allocate and initialize the dest structure 1097 */ 1098 ret = ip_vs_new_dest(svc, udest, &dest); 1099 } 1100 LeaveFunction(2); 1101 1102 return ret; 1103 } 1104 1105 1106 /* 1107 * Edit a destination in the given service 1108 */ 1109 static int 1110 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1111 { 1112 struct ip_vs_dest *dest; 1113 union nf_inet_addr daddr; 1114 __be16 dport = udest->port; 1115 1116 EnterFunction(2); 1117 1118 if (udest->weight < 0) { 1119 pr_err("%s(): server weight less than zero\n", __func__); 1120 return -ERANGE; 1121 } 1122 1123 if (udest->l_threshold > udest->u_threshold) { 1124 pr_err("%s(): lower threshold is higher than upper threshold\n", 1125 __func__); 1126 return -ERANGE; 1127 } 1128 1129 if (udest->tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) { 1130 if (udest->tun_port == 0) { 1131 pr_err("%s(): tunnel port is zero\n", __func__); 1132 return -EINVAL; 1133 } 1134 } 1135 1136 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1137 1138 /* We use function that requires RCU lock */ 1139 rcu_read_lock(); 1140 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1141 rcu_read_unlock(); 1142 1143 if (dest == NULL) { 1144 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1145 return -ENOENT; 1146 } 1147 1148 __ip_vs_update_dest(svc, dest, udest, 0); 1149 LeaveFunction(2); 1150 1151 return 0; 1152 } 1153 1154 /* 1155 * Delete a destination (must be already unlinked from the service) 1156 */ 1157 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, 1158 bool cleanup) 1159 { 1160 ip_vs_stop_estimator(ipvs, &dest->stats); 1161 1162 /* 1163 * Remove it from the d-linked list with the real services. 1164 */ 1165 ip_vs_rs_unhash(dest); 1166 1167 spin_lock_bh(&ipvs->dest_trash_lock); 1168 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1169 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1170 refcount_read(&dest->refcnt)); 1171 if (list_empty(&ipvs->dest_trash) && !cleanup) 1172 mod_timer(&ipvs->dest_trash_timer, 1173 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1174 /* dest lives in trash with reference */ 1175 list_add(&dest->t_list, &ipvs->dest_trash); 1176 dest->idle_start = 0; 1177 spin_unlock_bh(&ipvs->dest_trash_lock); 1178 1179 /* Queue up delayed work to expire all no destination connections. 1180 * No-op when CONFIG_SYSCTL is disabled. 1181 */ 1182 if (!cleanup) 1183 ip_vs_enqueue_expire_nodest_conns(ipvs); 1184 } 1185 1186 1187 /* 1188 * Unlink a destination from the given service 1189 */ 1190 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1191 struct ip_vs_dest *dest, 1192 int svcupd) 1193 { 1194 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1195 1196 /* 1197 * Remove it from the d-linked destination list. 1198 */ 1199 list_del_rcu(&dest->n_list); 1200 svc->num_dests--; 1201 1202 if (dest->af != svc->af) 1203 svc->ipvs->mixed_address_family_dests--; 1204 1205 if (svcupd) { 1206 struct ip_vs_scheduler *sched; 1207 1208 sched = rcu_dereference_protected(svc->scheduler, 1); 1209 if (sched && sched->del_dest) 1210 sched->del_dest(svc, dest); 1211 } 1212 } 1213 1214 1215 /* 1216 * Delete a destination server in the given service 1217 */ 1218 static int 1219 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1220 { 1221 struct ip_vs_dest *dest; 1222 __be16 dport = udest->port; 1223 1224 EnterFunction(2); 1225 1226 /* We use function that requires RCU lock */ 1227 rcu_read_lock(); 1228 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1229 rcu_read_unlock(); 1230 1231 if (dest == NULL) { 1232 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1233 return -ENOENT; 1234 } 1235 1236 /* 1237 * Unlink dest from the service 1238 */ 1239 __ip_vs_unlink_dest(svc, dest, 1); 1240 1241 /* 1242 * Delete the destination 1243 */ 1244 __ip_vs_del_dest(svc->ipvs, dest, false); 1245 1246 LeaveFunction(2); 1247 1248 return 0; 1249 } 1250 1251 static void ip_vs_dest_trash_expire(struct timer_list *t) 1252 { 1253 struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); 1254 struct ip_vs_dest *dest, *next; 1255 unsigned long now = jiffies; 1256 1257 spin_lock(&ipvs->dest_trash_lock); 1258 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1259 if (refcount_read(&dest->refcnt) > 1) 1260 continue; 1261 if (dest->idle_start) { 1262 if (time_before(now, dest->idle_start + 1263 IP_VS_DEST_TRASH_PERIOD)) 1264 continue; 1265 } else { 1266 dest->idle_start = max(1UL, now); 1267 continue; 1268 } 1269 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1270 dest->vfwmark, 1271 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1272 ntohs(dest->port)); 1273 list_del(&dest->t_list); 1274 ip_vs_dest_free(dest); 1275 } 1276 if (!list_empty(&ipvs->dest_trash)) 1277 mod_timer(&ipvs->dest_trash_timer, 1278 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1279 spin_unlock(&ipvs->dest_trash_lock); 1280 } 1281 1282 /* 1283 * Add a service into the service hash table 1284 */ 1285 static int 1286 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, 1287 struct ip_vs_service **svc_p) 1288 { 1289 int ret = 0, i; 1290 struct ip_vs_scheduler *sched = NULL; 1291 struct ip_vs_pe *pe = NULL; 1292 struct ip_vs_service *svc = NULL; 1293 int ret_hooks = -1; 1294 1295 /* increase the module use count */ 1296 if (!ip_vs_use_count_inc()) 1297 return -ENOPROTOOPT; 1298 1299 /* Lookup the scheduler by 'u->sched_name' */ 1300 if (strcmp(u->sched_name, "none")) { 1301 sched = ip_vs_scheduler_get(u->sched_name); 1302 if (!sched) { 1303 pr_info("Scheduler module ip_vs_%s not found\n", 1304 u->sched_name); 1305 ret = -ENOENT; 1306 goto out_err; 1307 } 1308 } 1309 1310 if (u->pe_name && *u->pe_name) { 1311 pe = ip_vs_pe_getbyname(u->pe_name); 1312 if (pe == NULL) { 1313 pr_info("persistence engine module ip_vs_pe_%s " 1314 "not found\n", u->pe_name); 1315 ret = -ENOENT; 1316 goto out_err; 1317 } 1318 } 1319 1320 #ifdef CONFIG_IP_VS_IPV6 1321 if (u->af == AF_INET6) { 1322 __u32 plen = (__force __u32) u->netmask; 1323 1324 if (plen < 1 || plen > 128) { 1325 ret = -EINVAL; 1326 goto out_err; 1327 } 1328 1329 ret = nf_defrag_ipv6_enable(ipvs->net); 1330 if (ret) 1331 goto out_err; 1332 } 1333 #endif 1334 1335 if ((u->af == AF_INET && !ipvs->num_services) || 1336 (u->af == AF_INET6 && !ipvs->num_services6)) { 1337 ret = ip_vs_register_hooks(ipvs, u->af); 1338 if (ret < 0) 1339 goto out_err; 1340 ret_hooks = ret; 1341 } 1342 1343 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1344 if (svc == NULL) { 1345 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1346 ret = -ENOMEM; 1347 goto out_err; 1348 } 1349 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1350 if (!svc->stats.cpustats) { 1351 ret = -ENOMEM; 1352 goto out_err; 1353 } 1354 1355 for_each_possible_cpu(i) { 1356 struct ip_vs_cpu_stats *ip_vs_stats; 1357 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1358 u64_stats_init(&ip_vs_stats->syncp); 1359 } 1360 1361 1362 /* I'm the first user of the service */ 1363 atomic_set(&svc->refcnt, 0); 1364 1365 svc->af = u->af; 1366 svc->protocol = u->protocol; 1367 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1368 svc->port = u->port; 1369 svc->fwmark = u->fwmark; 1370 svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; 1371 svc->timeout = u->timeout * HZ; 1372 svc->netmask = u->netmask; 1373 svc->ipvs = ipvs; 1374 1375 INIT_LIST_HEAD(&svc->destinations); 1376 spin_lock_init(&svc->sched_lock); 1377 spin_lock_init(&svc->stats.lock); 1378 1379 /* Bind the scheduler */ 1380 if (sched) { 1381 ret = ip_vs_bind_scheduler(svc, sched); 1382 if (ret) 1383 goto out_err; 1384 sched = NULL; 1385 } 1386 1387 /* Bind the ct retriever */ 1388 RCU_INIT_POINTER(svc->pe, pe); 1389 pe = NULL; 1390 1391 /* Update the virtual service counters */ 1392 if (svc->port == FTPPORT) 1393 atomic_inc(&ipvs->ftpsvc_counter); 1394 else if (svc->port == 0) 1395 atomic_inc(&ipvs->nullsvc_counter); 1396 if (svc->pe && svc->pe->conn_out) 1397 atomic_inc(&ipvs->conn_out_counter); 1398 1399 ip_vs_start_estimator(ipvs, &svc->stats); 1400 1401 /* Count only IPv4 services for old get/setsockopt interface */ 1402 if (svc->af == AF_INET) 1403 ipvs->num_services++; 1404 else if (svc->af == AF_INET6) 1405 ipvs->num_services6++; 1406 1407 /* Hash the service into the service table */ 1408 ip_vs_svc_hash(svc); 1409 1410 *svc_p = svc; 1411 /* Now there is a service - full throttle */ 1412 ipvs->enable = 1; 1413 return 0; 1414 1415 1416 out_err: 1417 if (ret_hooks >= 0) 1418 ip_vs_unregister_hooks(ipvs, u->af); 1419 if (svc != NULL) { 1420 ip_vs_unbind_scheduler(svc, sched); 1421 ip_vs_service_free(svc); 1422 } 1423 ip_vs_scheduler_put(sched); 1424 ip_vs_pe_put(pe); 1425 1426 /* decrease the module use count */ 1427 ip_vs_use_count_dec(); 1428 1429 return ret; 1430 } 1431 1432 1433 /* 1434 * Edit a service and bind it with a new scheduler 1435 */ 1436 static int 1437 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1438 { 1439 struct ip_vs_scheduler *sched = NULL, *old_sched; 1440 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1441 int ret = 0; 1442 bool new_pe_conn_out, old_pe_conn_out; 1443 1444 /* 1445 * Lookup the scheduler, by 'u->sched_name' 1446 */ 1447 if (strcmp(u->sched_name, "none")) { 1448 sched = ip_vs_scheduler_get(u->sched_name); 1449 if (!sched) { 1450 pr_info("Scheduler module ip_vs_%s not found\n", 1451 u->sched_name); 1452 return -ENOENT; 1453 } 1454 } 1455 old_sched = sched; 1456 1457 if (u->pe_name && *u->pe_name) { 1458 pe = ip_vs_pe_getbyname(u->pe_name); 1459 if (pe == NULL) { 1460 pr_info("persistence engine module ip_vs_pe_%s " 1461 "not found\n", u->pe_name); 1462 ret = -ENOENT; 1463 goto out; 1464 } 1465 old_pe = pe; 1466 } 1467 1468 #ifdef CONFIG_IP_VS_IPV6 1469 if (u->af == AF_INET6) { 1470 __u32 plen = (__force __u32) u->netmask; 1471 1472 if (plen < 1 || plen > 128) { 1473 ret = -EINVAL; 1474 goto out; 1475 } 1476 } 1477 #endif 1478 1479 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1480 if (sched != old_sched) { 1481 if (old_sched) { 1482 ip_vs_unbind_scheduler(svc, old_sched); 1483 RCU_INIT_POINTER(svc->scheduler, NULL); 1484 /* Wait all svc->sched_data users */ 1485 synchronize_rcu(); 1486 } 1487 /* Bind the new scheduler */ 1488 if (sched) { 1489 ret = ip_vs_bind_scheduler(svc, sched); 1490 if (ret) { 1491 ip_vs_scheduler_put(sched); 1492 goto out; 1493 } 1494 } 1495 } 1496 1497 /* 1498 * Set the flags and timeout value 1499 */ 1500 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1501 svc->timeout = u->timeout * HZ; 1502 svc->netmask = u->netmask; 1503 1504 old_pe = rcu_dereference_protected(svc->pe, 1); 1505 if (pe != old_pe) { 1506 rcu_assign_pointer(svc->pe, pe); 1507 /* check for optional methods in new pe */ 1508 new_pe_conn_out = (pe && pe->conn_out) ? true : false; 1509 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; 1510 if (new_pe_conn_out && !old_pe_conn_out) 1511 atomic_inc(&svc->ipvs->conn_out_counter); 1512 if (old_pe_conn_out && !new_pe_conn_out) 1513 atomic_dec(&svc->ipvs->conn_out_counter); 1514 } 1515 1516 out: 1517 ip_vs_scheduler_put(old_sched); 1518 ip_vs_pe_put(old_pe); 1519 return ret; 1520 } 1521 1522 /* 1523 * Delete a service from the service list 1524 * - The service must be unlinked, unlocked and not referenced! 1525 * - We are called under _bh lock 1526 */ 1527 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1528 { 1529 struct ip_vs_dest *dest, *nxt; 1530 struct ip_vs_scheduler *old_sched; 1531 struct ip_vs_pe *old_pe; 1532 struct netns_ipvs *ipvs = svc->ipvs; 1533 1534 if (svc->af == AF_INET) { 1535 ipvs->num_services--; 1536 if (!ipvs->num_services) 1537 ip_vs_unregister_hooks(ipvs, svc->af); 1538 } else if (svc->af == AF_INET6) { 1539 ipvs->num_services6--; 1540 if (!ipvs->num_services6) 1541 ip_vs_unregister_hooks(ipvs, svc->af); 1542 } 1543 1544 ip_vs_stop_estimator(svc->ipvs, &svc->stats); 1545 1546 /* Unbind scheduler */ 1547 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1548 ip_vs_unbind_scheduler(svc, old_sched); 1549 ip_vs_scheduler_put(old_sched); 1550 1551 /* Unbind persistence engine, keep svc->pe */ 1552 old_pe = rcu_dereference_protected(svc->pe, 1); 1553 if (old_pe && old_pe->conn_out) 1554 atomic_dec(&ipvs->conn_out_counter); 1555 ip_vs_pe_put(old_pe); 1556 1557 /* 1558 * Unlink the whole destination list 1559 */ 1560 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1561 __ip_vs_unlink_dest(svc, dest, 0); 1562 __ip_vs_del_dest(svc->ipvs, dest, cleanup); 1563 } 1564 1565 /* 1566 * Update the virtual service counters 1567 */ 1568 if (svc->port == FTPPORT) 1569 atomic_dec(&ipvs->ftpsvc_counter); 1570 else if (svc->port == 0) 1571 atomic_dec(&ipvs->nullsvc_counter); 1572 1573 /* 1574 * Free the service if nobody refers to it 1575 */ 1576 __ip_vs_svc_put(svc, true); 1577 1578 /* decrease the module use count */ 1579 ip_vs_use_count_dec(); 1580 } 1581 1582 /* 1583 * Unlink a service from list and try to delete it if its refcnt reached 0 1584 */ 1585 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1586 { 1587 ip_vs_unregister_conntrack(svc); 1588 /* Hold svc to avoid double release from dest_trash */ 1589 atomic_inc(&svc->refcnt); 1590 /* 1591 * Unhash it from the service table 1592 */ 1593 ip_vs_svc_unhash(svc); 1594 1595 __ip_vs_del_service(svc, cleanup); 1596 } 1597 1598 /* 1599 * Delete a service from the service list 1600 */ 1601 static int ip_vs_del_service(struct ip_vs_service *svc) 1602 { 1603 if (svc == NULL) 1604 return -EEXIST; 1605 ip_vs_unlink_service(svc, false); 1606 1607 return 0; 1608 } 1609 1610 1611 /* 1612 * Flush all the virtual services 1613 */ 1614 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) 1615 { 1616 int idx; 1617 struct ip_vs_service *svc; 1618 struct hlist_node *n; 1619 1620 /* 1621 * Flush the service table hashed by <netns,protocol,addr,port> 1622 */ 1623 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1624 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1625 s_list) { 1626 if (svc->ipvs == ipvs) 1627 ip_vs_unlink_service(svc, cleanup); 1628 } 1629 } 1630 1631 /* 1632 * Flush the service table hashed by fwmark 1633 */ 1634 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1635 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1636 f_list) { 1637 if (svc->ipvs == ipvs) 1638 ip_vs_unlink_service(svc, cleanup); 1639 } 1640 } 1641 1642 return 0; 1643 } 1644 1645 /* 1646 * Delete service by {netns} in the service table. 1647 * Called by __ip_vs_batch_cleanup() 1648 */ 1649 void ip_vs_service_nets_cleanup(struct list_head *net_list) 1650 { 1651 struct netns_ipvs *ipvs; 1652 struct net *net; 1653 1654 EnterFunction(2); 1655 /* Check for "full" addressed entries */ 1656 mutex_lock(&__ip_vs_mutex); 1657 list_for_each_entry(net, net_list, exit_list) { 1658 ipvs = net_ipvs(net); 1659 ip_vs_flush(ipvs, true); 1660 } 1661 mutex_unlock(&__ip_vs_mutex); 1662 LeaveFunction(2); 1663 } 1664 1665 /* Put all references for device (dst_cache) */ 1666 static inline void 1667 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1668 { 1669 struct ip_vs_dest_dst *dest_dst; 1670 1671 spin_lock_bh(&dest->dst_lock); 1672 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1673 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1674 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1675 dev->name, 1676 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1677 ntohs(dest->port), 1678 refcount_read(&dest->refcnt)); 1679 __ip_vs_dst_cache_reset(dest); 1680 } 1681 spin_unlock_bh(&dest->dst_lock); 1682 1683 } 1684 /* Netdev event receiver 1685 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1686 */ 1687 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1688 void *ptr) 1689 { 1690 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1691 struct net *net = dev_net(dev); 1692 struct netns_ipvs *ipvs = net_ipvs(net); 1693 struct ip_vs_service *svc; 1694 struct ip_vs_dest *dest; 1695 unsigned int idx; 1696 1697 if (event != NETDEV_DOWN || !ipvs) 1698 return NOTIFY_DONE; 1699 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1700 EnterFunction(2); 1701 mutex_lock(&__ip_vs_mutex); 1702 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1703 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1704 if (svc->ipvs == ipvs) { 1705 list_for_each_entry(dest, &svc->destinations, 1706 n_list) { 1707 ip_vs_forget_dev(dest, dev); 1708 } 1709 } 1710 } 1711 1712 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1713 if (svc->ipvs == ipvs) { 1714 list_for_each_entry(dest, &svc->destinations, 1715 n_list) { 1716 ip_vs_forget_dev(dest, dev); 1717 } 1718 } 1719 1720 } 1721 } 1722 1723 spin_lock_bh(&ipvs->dest_trash_lock); 1724 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1725 ip_vs_forget_dev(dest, dev); 1726 } 1727 spin_unlock_bh(&ipvs->dest_trash_lock); 1728 mutex_unlock(&__ip_vs_mutex); 1729 LeaveFunction(2); 1730 return NOTIFY_DONE; 1731 } 1732 1733 /* 1734 * Zero counters in a service or all services 1735 */ 1736 static int ip_vs_zero_service(struct ip_vs_service *svc) 1737 { 1738 struct ip_vs_dest *dest; 1739 1740 list_for_each_entry(dest, &svc->destinations, n_list) { 1741 ip_vs_zero_stats(&dest->stats); 1742 } 1743 ip_vs_zero_stats(&svc->stats); 1744 return 0; 1745 } 1746 1747 static int ip_vs_zero_all(struct netns_ipvs *ipvs) 1748 { 1749 int idx; 1750 struct ip_vs_service *svc; 1751 1752 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1753 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1754 if (svc->ipvs == ipvs) 1755 ip_vs_zero_service(svc); 1756 } 1757 } 1758 1759 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1760 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1761 if (svc->ipvs == ipvs) 1762 ip_vs_zero_service(svc); 1763 } 1764 } 1765 1766 ip_vs_zero_stats(&ipvs->tot_stats); 1767 return 0; 1768 } 1769 1770 #ifdef CONFIG_SYSCTL 1771 1772 static int three = 3; 1773 1774 static int 1775 proc_do_defense_mode(struct ctl_table *table, int write, 1776 void *buffer, size_t *lenp, loff_t *ppos) 1777 { 1778 struct netns_ipvs *ipvs = table->extra2; 1779 int *valp = table->data; 1780 int val = *valp; 1781 int rc; 1782 1783 struct ctl_table tmp = { 1784 .data = &val, 1785 .maxlen = sizeof(int), 1786 .mode = table->mode, 1787 }; 1788 1789 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1790 if (write && (*valp != val)) { 1791 if (val < 0 || val > 3) { 1792 rc = -EINVAL; 1793 } else { 1794 *valp = val; 1795 update_defense_level(ipvs); 1796 } 1797 } 1798 return rc; 1799 } 1800 1801 static int 1802 proc_do_sync_threshold(struct ctl_table *table, int write, 1803 void *buffer, size_t *lenp, loff_t *ppos) 1804 { 1805 int *valp = table->data; 1806 int val[2]; 1807 int rc; 1808 struct ctl_table tmp = { 1809 .data = &val, 1810 .maxlen = table->maxlen, 1811 .mode = table->mode, 1812 }; 1813 1814 memcpy(val, valp, sizeof(val)); 1815 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1816 if (write) { 1817 if (val[0] < 0 || val[1] < 0 || 1818 (val[0] >= val[1] && val[1])) 1819 rc = -EINVAL; 1820 else 1821 memcpy(valp, val, sizeof(val)); 1822 } 1823 return rc; 1824 } 1825 1826 static int 1827 proc_do_sync_ports(struct ctl_table *table, int write, 1828 void *buffer, size_t *lenp, loff_t *ppos) 1829 { 1830 int *valp = table->data; 1831 int val = *valp; 1832 int rc; 1833 1834 struct ctl_table tmp = { 1835 .data = &val, 1836 .maxlen = sizeof(int), 1837 .mode = table->mode, 1838 }; 1839 1840 rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); 1841 if (write && (*valp != val)) { 1842 if (val < 1 || !is_power_of_2(val)) 1843 rc = -EINVAL; 1844 else 1845 *valp = val; 1846 } 1847 return rc; 1848 } 1849 1850 /* 1851 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1852 * Do not change order or insert new entries without 1853 * align with netns init in ip_vs_control_net_init() 1854 */ 1855 1856 static struct ctl_table vs_vars[] = { 1857 { 1858 .procname = "amemthresh", 1859 .maxlen = sizeof(int), 1860 .mode = 0644, 1861 .proc_handler = proc_dointvec, 1862 }, 1863 { 1864 .procname = "am_droprate", 1865 .maxlen = sizeof(int), 1866 .mode = 0644, 1867 .proc_handler = proc_dointvec, 1868 }, 1869 { 1870 .procname = "drop_entry", 1871 .maxlen = sizeof(int), 1872 .mode = 0644, 1873 .proc_handler = proc_do_defense_mode, 1874 }, 1875 { 1876 .procname = "drop_packet", 1877 .maxlen = sizeof(int), 1878 .mode = 0644, 1879 .proc_handler = proc_do_defense_mode, 1880 }, 1881 #ifdef CONFIG_IP_VS_NFCT 1882 { 1883 .procname = "conntrack", 1884 .maxlen = sizeof(int), 1885 .mode = 0644, 1886 .proc_handler = &proc_dointvec, 1887 }, 1888 #endif 1889 { 1890 .procname = "secure_tcp", 1891 .maxlen = sizeof(int), 1892 .mode = 0644, 1893 .proc_handler = proc_do_defense_mode, 1894 }, 1895 { 1896 .procname = "snat_reroute", 1897 .maxlen = sizeof(int), 1898 .mode = 0644, 1899 .proc_handler = &proc_dointvec, 1900 }, 1901 { 1902 .procname = "sync_version", 1903 .maxlen = sizeof(int), 1904 .mode = 0644, 1905 .proc_handler = proc_dointvec_minmax, 1906 .extra1 = SYSCTL_ZERO, 1907 .extra2 = SYSCTL_ONE, 1908 }, 1909 { 1910 .procname = "sync_ports", 1911 .maxlen = sizeof(int), 1912 .mode = 0644, 1913 .proc_handler = proc_do_sync_ports, 1914 }, 1915 { 1916 .procname = "sync_persist_mode", 1917 .maxlen = sizeof(int), 1918 .mode = 0644, 1919 .proc_handler = proc_dointvec, 1920 }, 1921 { 1922 .procname = "sync_qlen_max", 1923 .maxlen = sizeof(unsigned long), 1924 .mode = 0644, 1925 .proc_handler = proc_doulongvec_minmax, 1926 }, 1927 { 1928 .procname = "sync_sock_size", 1929 .maxlen = sizeof(int), 1930 .mode = 0644, 1931 .proc_handler = proc_dointvec, 1932 }, 1933 { 1934 .procname = "cache_bypass", 1935 .maxlen = sizeof(int), 1936 .mode = 0644, 1937 .proc_handler = proc_dointvec, 1938 }, 1939 { 1940 .procname = "expire_nodest_conn", 1941 .maxlen = sizeof(int), 1942 .mode = 0644, 1943 .proc_handler = proc_dointvec, 1944 }, 1945 { 1946 .procname = "sloppy_tcp", 1947 .maxlen = sizeof(int), 1948 .mode = 0644, 1949 .proc_handler = proc_dointvec, 1950 }, 1951 { 1952 .procname = "sloppy_sctp", 1953 .maxlen = sizeof(int), 1954 .mode = 0644, 1955 .proc_handler = proc_dointvec, 1956 }, 1957 { 1958 .procname = "expire_quiescent_template", 1959 .maxlen = sizeof(int), 1960 .mode = 0644, 1961 .proc_handler = proc_dointvec, 1962 }, 1963 { 1964 .procname = "sync_threshold", 1965 .maxlen = 1966 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1967 .mode = 0644, 1968 .proc_handler = proc_do_sync_threshold, 1969 }, 1970 { 1971 .procname = "sync_refresh_period", 1972 .maxlen = sizeof(int), 1973 .mode = 0644, 1974 .proc_handler = proc_dointvec_jiffies, 1975 }, 1976 { 1977 .procname = "sync_retries", 1978 .maxlen = sizeof(int), 1979 .mode = 0644, 1980 .proc_handler = proc_dointvec_minmax, 1981 .extra1 = SYSCTL_ZERO, 1982 .extra2 = &three, 1983 }, 1984 { 1985 .procname = "nat_icmp_send", 1986 .maxlen = sizeof(int), 1987 .mode = 0644, 1988 .proc_handler = proc_dointvec, 1989 }, 1990 { 1991 .procname = "pmtu_disc", 1992 .maxlen = sizeof(int), 1993 .mode = 0644, 1994 .proc_handler = proc_dointvec, 1995 }, 1996 { 1997 .procname = "backup_only", 1998 .maxlen = sizeof(int), 1999 .mode = 0644, 2000 .proc_handler = proc_dointvec, 2001 }, 2002 { 2003 .procname = "conn_reuse_mode", 2004 .maxlen = sizeof(int), 2005 .mode = 0644, 2006 .proc_handler = proc_dointvec, 2007 }, 2008 { 2009 .procname = "schedule_icmp", 2010 .maxlen = sizeof(int), 2011 .mode = 0644, 2012 .proc_handler = proc_dointvec, 2013 }, 2014 { 2015 .procname = "ignore_tunneled", 2016 .maxlen = sizeof(int), 2017 .mode = 0644, 2018 .proc_handler = proc_dointvec, 2019 }, 2020 #ifdef CONFIG_IP_VS_DEBUG 2021 { 2022 .procname = "debug_level", 2023 .data = &sysctl_ip_vs_debug_level, 2024 .maxlen = sizeof(int), 2025 .mode = 0644, 2026 .proc_handler = proc_dointvec, 2027 }, 2028 #endif 2029 { } 2030 }; 2031 2032 #endif 2033 2034 #ifdef CONFIG_PROC_FS 2035 2036 struct ip_vs_iter { 2037 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 2038 struct hlist_head *table; 2039 int bucket; 2040 }; 2041 2042 /* 2043 * Write the contents of the VS rule table to a PROCfs file. 2044 * (It is kept just for backward compatibility) 2045 */ 2046 static inline const char *ip_vs_fwd_name(unsigned int flags) 2047 { 2048 switch (flags & IP_VS_CONN_F_FWD_MASK) { 2049 case IP_VS_CONN_F_LOCALNODE: 2050 return "Local"; 2051 case IP_VS_CONN_F_TUNNEL: 2052 return "Tunnel"; 2053 case IP_VS_CONN_F_DROUTE: 2054 return "Route"; 2055 default: 2056 return "Masq"; 2057 } 2058 } 2059 2060 2061 /* Get the Nth entry in the two lists */ 2062 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 2063 { 2064 struct net *net = seq_file_net(seq); 2065 struct netns_ipvs *ipvs = net_ipvs(net); 2066 struct ip_vs_iter *iter = seq->private; 2067 int idx; 2068 struct ip_vs_service *svc; 2069 2070 /* look in hash by protocol */ 2071 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2072 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 2073 if ((svc->ipvs == ipvs) && pos-- == 0) { 2074 iter->table = ip_vs_svc_table; 2075 iter->bucket = idx; 2076 return svc; 2077 } 2078 } 2079 } 2080 2081 /* keep looking in fwmark */ 2082 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2083 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 2084 f_list) { 2085 if ((svc->ipvs == ipvs) && pos-- == 0) { 2086 iter->table = ip_vs_svc_fwm_table; 2087 iter->bucket = idx; 2088 return svc; 2089 } 2090 } 2091 } 2092 2093 return NULL; 2094 } 2095 2096 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 2097 __acquires(RCU) 2098 { 2099 rcu_read_lock(); 2100 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 2101 } 2102 2103 2104 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 2105 { 2106 struct hlist_node *e; 2107 struct ip_vs_iter *iter; 2108 struct ip_vs_service *svc; 2109 2110 ++*pos; 2111 if (v == SEQ_START_TOKEN) 2112 return ip_vs_info_array(seq,0); 2113 2114 svc = v; 2115 iter = seq->private; 2116 2117 if (iter->table == ip_vs_svc_table) { 2118 /* next service in table hashed by protocol */ 2119 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 2120 if (e) 2121 return hlist_entry(e, struct ip_vs_service, s_list); 2122 2123 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2124 hlist_for_each_entry_rcu(svc, 2125 &ip_vs_svc_table[iter->bucket], 2126 s_list) { 2127 return svc; 2128 } 2129 } 2130 2131 iter->table = ip_vs_svc_fwm_table; 2132 iter->bucket = -1; 2133 goto scan_fwmark; 2134 } 2135 2136 /* next service in hashed by fwmark */ 2137 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 2138 if (e) 2139 return hlist_entry(e, struct ip_vs_service, f_list); 2140 2141 scan_fwmark: 2142 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2143 hlist_for_each_entry_rcu(svc, 2144 &ip_vs_svc_fwm_table[iter->bucket], 2145 f_list) 2146 return svc; 2147 } 2148 2149 return NULL; 2150 } 2151 2152 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2153 __releases(RCU) 2154 { 2155 rcu_read_unlock(); 2156 } 2157 2158 2159 static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2160 { 2161 if (v == SEQ_START_TOKEN) { 2162 seq_printf(seq, 2163 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2164 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2165 seq_puts(seq, 2166 "Prot LocalAddress:Port Scheduler Flags\n"); 2167 seq_puts(seq, 2168 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2169 } else { 2170 struct net *net = seq_file_net(seq); 2171 struct netns_ipvs *ipvs = net_ipvs(net); 2172 const struct ip_vs_service *svc = v; 2173 const struct ip_vs_iter *iter = seq->private; 2174 const struct ip_vs_dest *dest; 2175 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2176 char *sched_name = sched ? sched->name : "none"; 2177 2178 if (svc->ipvs != ipvs) 2179 return 0; 2180 if (iter->table == ip_vs_svc_table) { 2181 #ifdef CONFIG_IP_VS_IPV6 2182 if (svc->af == AF_INET6) 2183 seq_printf(seq, "%s [%pI6]:%04X %s ", 2184 ip_vs_proto_name(svc->protocol), 2185 &svc->addr.in6, 2186 ntohs(svc->port), 2187 sched_name); 2188 else 2189 #endif 2190 seq_printf(seq, "%s %08X:%04X %s %s ", 2191 ip_vs_proto_name(svc->protocol), 2192 ntohl(svc->addr.ip), 2193 ntohs(svc->port), 2194 sched_name, 2195 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2196 } else { 2197 seq_printf(seq, "FWM %08X %s %s", 2198 svc->fwmark, sched_name, 2199 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2200 } 2201 2202 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2203 seq_printf(seq, "persistent %d %08X\n", 2204 svc->timeout, 2205 ntohl(svc->netmask)); 2206 else 2207 seq_putc(seq, '\n'); 2208 2209 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2210 #ifdef CONFIG_IP_VS_IPV6 2211 if (dest->af == AF_INET6) 2212 seq_printf(seq, 2213 " -> [%pI6]:%04X" 2214 " %-7s %-6d %-10d %-10d\n", 2215 &dest->addr.in6, 2216 ntohs(dest->port), 2217 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2218 atomic_read(&dest->weight), 2219 atomic_read(&dest->activeconns), 2220 atomic_read(&dest->inactconns)); 2221 else 2222 #endif 2223 seq_printf(seq, 2224 " -> %08X:%04X " 2225 "%-7s %-6d %-10d %-10d\n", 2226 ntohl(dest->addr.ip), 2227 ntohs(dest->port), 2228 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2229 atomic_read(&dest->weight), 2230 atomic_read(&dest->activeconns), 2231 atomic_read(&dest->inactconns)); 2232 2233 } 2234 } 2235 return 0; 2236 } 2237 2238 static const struct seq_operations ip_vs_info_seq_ops = { 2239 .start = ip_vs_info_seq_start, 2240 .next = ip_vs_info_seq_next, 2241 .stop = ip_vs_info_seq_stop, 2242 .show = ip_vs_info_seq_show, 2243 }; 2244 2245 static int ip_vs_stats_show(struct seq_file *seq, void *v) 2246 { 2247 struct net *net = seq_file_single_net(seq); 2248 struct ip_vs_kstats show; 2249 2250 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2251 seq_puts(seq, 2252 " Total Incoming Outgoing Incoming Outgoing\n"); 2253 seq_puts(seq, 2254 " Conns Packets Packets Bytes Bytes\n"); 2255 2256 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2257 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", 2258 (unsigned long long)show.conns, 2259 (unsigned long long)show.inpkts, 2260 (unsigned long long)show.outpkts, 2261 (unsigned long long)show.inbytes, 2262 (unsigned long long)show.outbytes); 2263 2264 /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ 2265 seq_puts(seq, 2266 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2267 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", 2268 (unsigned long long)show.cps, 2269 (unsigned long long)show.inpps, 2270 (unsigned long long)show.outpps, 2271 (unsigned long long)show.inbps, 2272 (unsigned long long)show.outbps); 2273 2274 return 0; 2275 } 2276 2277 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2278 { 2279 struct net *net = seq_file_single_net(seq); 2280 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2281 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2282 struct ip_vs_kstats kstats; 2283 int i; 2284 2285 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2286 seq_puts(seq, 2287 " Total Incoming Outgoing Incoming Outgoing\n"); 2288 seq_puts(seq, 2289 "CPU Conns Packets Packets Bytes Bytes\n"); 2290 2291 for_each_possible_cpu(i) { 2292 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2293 unsigned int start; 2294 u64 conns, inpkts, outpkts, inbytes, outbytes; 2295 2296 do { 2297 start = u64_stats_fetch_begin_irq(&u->syncp); 2298 conns = u->cnt.conns; 2299 inpkts = u->cnt.inpkts; 2300 outpkts = u->cnt.outpkts; 2301 inbytes = u->cnt.inbytes; 2302 outbytes = u->cnt.outbytes; 2303 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2304 2305 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", 2306 i, (u64)conns, (u64)inpkts, 2307 (u64)outpkts, (u64)inbytes, 2308 (u64)outbytes); 2309 } 2310 2311 ip_vs_copy_stats(&kstats, tot_stats); 2312 2313 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", 2314 (unsigned long long)kstats.conns, 2315 (unsigned long long)kstats.inpkts, 2316 (unsigned long long)kstats.outpkts, 2317 (unsigned long long)kstats.inbytes, 2318 (unsigned long long)kstats.outbytes); 2319 2320 /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2321 seq_puts(seq, 2322 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2323 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", 2324 kstats.cps, 2325 kstats.inpps, 2326 kstats.outpps, 2327 kstats.inbps, 2328 kstats.outbps); 2329 2330 return 0; 2331 } 2332 #endif 2333 2334 /* 2335 * Set timeout values for tcp tcpfin udp in the timeout_table. 2336 */ 2337 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2338 { 2339 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2340 struct ip_vs_proto_data *pd; 2341 #endif 2342 2343 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2344 u->tcp_timeout, 2345 u->tcp_fin_timeout, 2346 u->udp_timeout); 2347 2348 #ifdef CONFIG_IP_VS_PROTO_TCP 2349 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2350 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2351 return -EINVAL; 2352 } 2353 #endif 2354 2355 #ifdef CONFIG_IP_VS_PROTO_UDP 2356 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2357 return -EINVAL; 2358 #endif 2359 2360 #ifdef CONFIG_IP_VS_PROTO_TCP 2361 if (u->tcp_timeout) { 2362 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2363 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2364 = u->tcp_timeout * HZ; 2365 } 2366 2367 if (u->tcp_fin_timeout) { 2368 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2369 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2370 = u->tcp_fin_timeout * HZ; 2371 } 2372 #endif 2373 2374 #ifdef CONFIG_IP_VS_PROTO_UDP 2375 if (u->udp_timeout) { 2376 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2377 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2378 = u->udp_timeout * HZ; 2379 } 2380 #endif 2381 return 0; 2382 } 2383 2384 #define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2385 2386 struct ip_vs_svcdest_user { 2387 struct ip_vs_service_user s; 2388 struct ip_vs_dest_user d; 2389 }; 2390 2391 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2392 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2393 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2394 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2395 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2396 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2397 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2398 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2399 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2400 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2401 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2402 }; 2403 2404 union ip_vs_set_arglen { 2405 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2406 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2407 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2408 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2409 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2410 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2411 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2412 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2413 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2414 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2415 }; 2416 2417 #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2418 2419 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2420 struct ip_vs_service_user *usvc_compat) 2421 { 2422 memset(usvc, 0, sizeof(*usvc)); 2423 2424 usvc->af = AF_INET; 2425 usvc->protocol = usvc_compat->protocol; 2426 usvc->addr.ip = usvc_compat->addr; 2427 usvc->port = usvc_compat->port; 2428 usvc->fwmark = usvc_compat->fwmark; 2429 2430 /* Deep copy of sched_name is not needed here */ 2431 usvc->sched_name = usvc_compat->sched_name; 2432 2433 usvc->flags = usvc_compat->flags; 2434 usvc->timeout = usvc_compat->timeout; 2435 usvc->netmask = usvc_compat->netmask; 2436 } 2437 2438 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2439 struct ip_vs_dest_user *udest_compat) 2440 { 2441 memset(udest, 0, sizeof(*udest)); 2442 2443 udest->addr.ip = udest_compat->addr; 2444 udest->port = udest_compat->port; 2445 udest->conn_flags = udest_compat->conn_flags; 2446 udest->weight = udest_compat->weight; 2447 udest->u_threshold = udest_compat->u_threshold; 2448 udest->l_threshold = udest_compat->l_threshold; 2449 udest->af = AF_INET; 2450 udest->tun_type = IP_VS_CONN_F_TUNNEL_TYPE_IPIP; 2451 } 2452 2453 static int 2454 do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) 2455 { 2456 struct net *net = sock_net(sk); 2457 int ret; 2458 unsigned char arg[MAX_SET_ARGLEN]; 2459 struct ip_vs_service_user *usvc_compat; 2460 struct ip_vs_service_user_kern usvc; 2461 struct ip_vs_service *svc; 2462 struct ip_vs_dest_user *udest_compat; 2463 struct ip_vs_dest_user_kern udest; 2464 struct netns_ipvs *ipvs = net_ipvs(net); 2465 2466 BUILD_BUG_ON(sizeof(arg) > 255); 2467 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2468 return -EPERM; 2469 2470 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2471 return -EINVAL; 2472 if (len != set_arglen[CMDID(cmd)]) { 2473 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2474 len, set_arglen[CMDID(cmd)]); 2475 return -EINVAL; 2476 } 2477 2478 if (copy_from_sockptr(arg, ptr, len) != 0) 2479 return -EFAULT; 2480 2481 /* Handle daemons since they have another lock */ 2482 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2483 cmd == IP_VS_SO_SET_STOPDAEMON) { 2484 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2485 2486 if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2487 struct ipvs_sync_daemon_cfg cfg; 2488 2489 memset(&cfg, 0, sizeof(cfg)); 2490 ret = -EINVAL; 2491 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, 2492 sizeof(cfg.mcast_ifn)) <= 0) 2493 return ret; 2494 cfg.syncid = dm->syncid; 2495 ret = start_sync_thread(ipvs, &cfg, dm->state); 2496 } else { 2497 ret = stop_sync_thread(ipvs, dm->state); 2498 } 2499 return ret; 2500 } 2501 2502 mutex_lock(&__ip_vs_mutex); 2503 if (cmd == IP_VS_SO_SET_FLUSH) { 2504 /* Flush the virtual service */ 2505 ret = ip_vs_flush(ipvs, false); 2506 goto out_unlock; 2507 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2508 /* Set timeout values for (tcp tcpfin udp) */ 2509 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); 2510 goto out_unlock; 2511 } else if (!len) { 2512 /* No more commands with len == 0 below */ 2513 ret = -EINVAL; 2514 goto out_unlock; 2515 } 2516 2517 usvc_compat = (struct ip_vs_service_user *)arg; 2518 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2519 2520 /* We only use the new structs internally, so copy userspace compat 2521 * structs to extended internal versions */ 2522 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2523 ip_vs_copy_udest_compat(&udest, udest_compat); 2524 2525 if (cmd == IP_VS_SO_SET_ZERO) { 2526 /* if no service address is set, zero counters in all */ 2527 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2528 ret = ip_vs_zero_all(ipvs); 2529 goto out_unlock; 2530 } 2531 } 2532 2533 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && 2534 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == 2535 IP_VS_SCHEDNAME_MAXLEN) { 2536 ret = -EINVAL; 2537 goto out_unlock; 2538 } 2539 2540 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2541 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2542 usvc.protocol != IPPROTO_SCTP) { 2543 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", 2544 usvc.protocol, &usvc.addr.ip, 2545 ntohs(usvc.port)); 2546 ret = -EFAULT; 2547 goto out_unlock; 2548 } 2549 2550 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2551 rcu_read_lock(); 2552 if (usvc.fwmark == 0) 2553 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, 2554 &usvc.addr, usvc.port); 2555 else 2556 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); 2557 rcu_read_unlock(); 2558 2559 if (cmd != IP_VS_SO_SET_ADD 2560 && (svc == NULL || svc->protocol != usvc.protocol)) { 2561 ret = -ESRCH; 2562 goto out_unlock; 2563 } 2564 2565 switch (cmd) { 2566 case IP_VS_SO_SET_ADD: 2567 if (svc != NULL) 2568 ret = -EEXIST; 2569 else 2570 ret = ip_vs_add_service(ipvs, &usvc, &svc); 2571 break; 2572 case IP_VS_SO_SET_EDIT: 2573 ret = ip_vs_edit_service(svc, &usvc); 2574 break; 2575 case IP_VS_SO_SET_DEL: 2576 ret = ip_vs_del_service(svc); 2577 if (!ret) 2578 goto out_unlock; 2579 break; 2580 case IP_VS_SO_SET_ZERO: 2581 ret = ip_vs_zero_service(svc); 2582 break; 2583 case IP_VS_SO_SET_ADDDEST: 2584 ret = ip_vs_add_dest(svc, &udest); 2585 break; 2586 case IP_VS_SO_SET_EDITDEST: 2587 ret = ip_vs_edit_dest(svc, &udest); 2588 break; 2589 case IP_VS_SO_SET_DELDEST: 2590 ret = ip_vs_del_dest(svc, &udest); 2591 } 2592 2593 out_unlock: 2594 mutex_unlock(&__ip_vs_mutex); 2595 return ret; 2596 } 2597 2598 2599 static void 2600 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2601 { 2602 struct ip_vs_scheduler *sched; 2603 struct ip_vs_kstats kstats; 2604 char *sched_name; 2605 2606 sched = rcu_dereference_protected(src->scheduler, 1); 2607 sched_name = sched ? sched->name : "none"; 2608 dst->protocol = src->protocol; 2609 dst->addr = src->addr.ip; 2610 dst->port = src->port; 2611 dst->fwmark = src->fwmark; 2612 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2613 dst->flags = src->flags; 2614 dst->timeout = src->timeout / HZ; 2615 dst->netmask = src->netmask; 2616 dst->num_dests = src->num_dests; 2617 ip_vs_copy_stats(&kstats, &src->stats); 2618 ip_vs_export_stats_user(&dst->stats, &kstats); 2619 } 2620 2621 static inline int 2622 __ip_vs_get_service_entries(struct netns_ipvs *ipvs, 2623 const struct ip_vs_get_services *get, 2624 struct ip_vs_get_services __user *uptr) 2625 { 2626 int idx, count=0; 2627 struct ip_vs_service *svc; 2628 struct ip_vs_service_entry entry; 2629 int ret = 0; 2630 2631 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2632 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2633 /* Only expose IPv4 entries to old interface */ 2634 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2635 continue; 2636 2637 if (count >= get->num_services) 2638 goto out; 2639 memset(&entry, 0, sizeof(entry)); 2640 ip_vs_copy_service(&entry, svc); 2641 if (copy_to_user(&uptr->entrytable[count], 2642 &entry, sizeof(entry))) { 2643 ret = -EFAULT; 2644 goto out; 2645 } 2646 count++; 2647 } 2648 } 2649 2650 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2651 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2652 /* Only expose IPv4 entries to old interface */ 2653 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2654 continue; 2655 2656 if (count >= get->num_services) 2657 goto out; 2658 memset(&entry, 0, sizeof(entry)); 2659 ip_vs_copy_service(&entry, svc); 2660 if (copy_to_user(&uptr->entrytable[count], 2661 &entry, sizeof(entry))) { 2662 ret = -EFAULT; 2663 goto out; 2664 } 2665 count++; 2666 } 2667 } 2668 out: 2669 return ret; 2670 } 2671 2672 static inline int 2673 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, 2674 struct ip_vs_get_dests __user *uptr) 2675 { 2676 struct ip_vs_service *svc; 2677 union nf_inet_addr addr = { .ip = get->addr }; 2678 int ret = 0; 2679 2680 rcu_read_lock(); 2681 if (get->fwmark) 2682 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); 2683 else 2684 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, 2685 get->port); 2686 rcu_read_unlock(); 2687 2688 if (svc) { 2689 int count = 0; 2690 struct ip_vs_dest *dest; 2691 struct ip_vs_dest_entry entry; 2692 struct ip_vs_kstats kstats; 2693 2694 memset(&entry, 0, sizeof(entry)); 2695 list_for_each_entry(dest, &svc->destinations, n_list) { 2696 if (count >= get->num_dests) 2697 break; 2698 2699 /* Cannot expose heterogeneous members via sockopt 2700 * interface 2701 */ 2702 if (dest->af != svc->af) 2703 continue; 2704 2705 entry.addr = dest->addr.ip; 2706 entry.port = dest->port; 2707 entry.conn_flags = atomic_read(&dest->conn_flags); 2708 entry.weight = atomic_read(&dest->weight); 2709 entry.u_threshold = dest->u_threshold; 2710 entry.l_threshold = dest->l_threshold; 2711 entry.activeconns = atomic_read(&dest->activeconns); 2712 entry.inactconns = atomic_read(&dest->inactconns); 2713 entry.persistconns = atomic_read(&dest->persistconns); 2714 ip_vs_copy_stats(&kstats, &dest->stats); 2715 ip_vs_export_stats_user(&entry.stats, &kstats); 2716 if (copy_to_user(&uptr->entrytable[count], 2717 &entry, sizeof(entry))) { 2718 ret = -EFAULT; 2719 break; 2720 } 2721 count++; 2722 } 2723 } else 2724 ret = -ESRCH; 2725 return ret; 2726 } 2727 2728 static inline void 2729 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2730 { 2731 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2732 struct ip_vs_proto_data *pd; 2733 #endif 2734 2735 memset(u, 0, sizeof (*u)); 2736 2737 #ifdef CONFIG_IP_VS_PROTO_TCP 2738 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2739 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2740 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2741 #endif 2742 #ifdef CONFIG_IP_VS_PROTO_UDP 2743 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2744 u->udp_timeout = 2745 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2746 #endif 2747 } 2748 2749 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2750 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2751 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2752 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2753 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2754 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2755 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2756 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2757 }; 2758 2759 union ip_vs_get_arglen { 2760 char field_IP_VS_SO_GET_VERSION[64]; 2761 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2762 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2763 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2764 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2765 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2766 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2767 }; 2768 2769 #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2770 2771 static int 2772 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2773 { 2774 unsigned char arg[MAX_GET_ARGLEN]; 2775 int ret = 0; 2776 unsigned int copylen; 2777 struct net *net = sock_net(sk); 2778 struct netns_ipvs *ipvs = net_ipvs(net); 2779 2780 BUG_ON(!net); 2781 BUILD_BUG_ON(sizeof(arg) > 255); 2782 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2783 return -EPERM; 2784 2785 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2786 return -EINVAL; 2787 2788 copylen = get_arglen[CMDID(cmd)]; 2789 if (*len < (int) copylen) { 2790 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2791 return -EINVAL; 2792 } 2793 2794 if (copy_from_user(arg, user, copylen) != 0) 2795 return -EFAULT; 2796 /* 2797 * Handle daemons first since it has its own locking 2798 */ 2799 if (cmd == IP_VS_SO_GET_DAEMON) { 2800 struct ip_vs_daemon_user d[2]; 2801 2802 memset(&d, 0, sizeof(d)); 2803 mutex_lock(&ipvs->sync_mutex); 2804 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2805 d[0].state = IP_VS_STATE_MASTER; 2806 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, 2807 sizeof(d[0].mcast_ifn)); 2808 d[0].syncid = ipvs->mcfg.syncid; 2809 } 2810 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2811 d[1].state = IP_VS_STATE_BACKUP; 2812 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, 2813 sizeof(d[1].mcast_ifn)); 2814 d[1].syncid = ipvs->bcfg.syncid; 2815 } 2816 if (copy_to_user(user, &d, sizeof(d)) != 0) 2817 ret = -EFAULT; 2818 mutex_unlock(&ipvs->sync_mutex); 2819 return ret; 2820 } 2821 2822 mutex_lock(&__ip_vs_mutex); 2823 switch (cmd) { 2824 case IP_VS_SO_GET_VERSION: 2825 { 2826 char buf[64]; 2827 2828 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2829 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2830 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2831 ret = -EFAULT; 2832 goto out; 2833 } 2834 *len = strlen(buf)+1; 2835 } 2836 break; 2837 2838 case IP_VS_SO_GET_INFO: 2839 { 2840 struct ip_vs_getinfo info; 2841 info.version = IP_VS_VERSION_CODE; 2842 info.size = ip_vs_conn_tab_size; 2843 info.num_services = ipvs->num_services; 2844 if (copy_to_user(user, &info, sizeof(info)) != 0) 2845 ret = -EFAULT; 2846 } 2847 break; 2848 2849 case IP_VS_SO_GET_SERVICES: 2850 { 2851 struct ip_vs_get_services *get; 2852 int size; 2853 2854 get = (struct ip_vs_get_services *)arg; 2855 size = struct_size(get, entrytable, get->num_services); 2856 if (*len != size) { 2857 pr_err("length: %u != %u\n", *len, size); 2858 ret = -EINVAL; 2859 goto out; 2860 } 2861 ret = __ip_vs_get_service_entries(ipvs, get, user); 2862 } 2863 break; 2864 2865 case IP_VS_SO_GET_SERVICE: 2866 { 2867 struct ip_vs_service_entry *entry; 2868 struct ip_vs_service *svc; 2869 union nf_inet_addr addr; 2870 2871 entry = (struct ip_vs_service_entry *)arg; 2872 addr.ip = entry->addr; 2873 rcu_read_lock(); 2874 if (entry->fwmark) 2875 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); 2876 else 2877 svc = __ip_vs_service_find(ipvs, AF_INET, 2878 entry->protocol, &addr, 2879 entry->port); 2880 rcu_read_unlock(); 2881 if (svc) { 2882 ip_vs_copy_service(entry, svc); 2883 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2884 ret = -EFAULT; 2885 } else 2886 ret = -ESRCH; 2887 } 2888 break; 2889 2890 case IP_VS_SO_GET_DESTS: 2891 { 2892 struct ip_vs_get_dests *get; 2893 int size; 2894 2895 get = (struct ip_vs_get_dests *)arg; 2896 size = struct_size(get, entrytable, get->num_dests); 2897 if (*len != size) { 2898 pr_err("length: %u != %u\n", *len, size); 2899 ret = -EINVAL; 2900 goto out; 2901 } 2902 ret = __ip_vs_get_dest_entries(ipvs, get, user); 2903 } 2904 break; 2905 2906 case IP_VS_SO_GET_TIMEOUT: 2907 { 2908 struct ip_vs_timeout_user t; 2909 2910 __ip_vs_get_timeouts(ipvs, &t); 2911 if (copy_to_user(user, &t, sizeof(t)) != 0) 2912 ret = -EFAULT; 2913 } 2914 break; 2915 2916 default: 2917 ret = -EINVAL; 2918 } 2919 2920 out: 2921 mutex_unlock(&__ip_vs_mutex); 2922 return ret; 2923 } 2924 2925 2926 static struct nf_sockopt_ops ip_vs_sockopts = { 2927 .pf = PF_INET, 2928 .set_optmin = IP_VS_BASE_CTL, 2929 .set_optmax = IP_VS_SO_SET_MAX+1, 2930 .set = do_ip_vs_set_ctl, 2931 .get_optmin = IP_VS_BASE_CTL, 2932 .get_optmax = IP_VS_SO_GET_MAX+1, 2933 .get = do_ip_vs_get_ctl, 2934 .owner = THIS_MODULE, 2935 }; 2936 2937 /* 2938 * Generic Netlink interface 2939 */ 2940 2941 /* IPVS genetlink family */ 2942 static struct genl_family ip_vs_genl_family; 2943 2944 /* Policy used for first-level command attributes */ 2945 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2946 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2947 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2948 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2949 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2950 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2951 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2952 }; 2953 2954 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2955 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2956 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2957 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2958 .len = IP_VS_IFNAME_MAXLEN - 1 }, 2959 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2960 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, 2961 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, 2962 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, 2963 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, 2964 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, 2965 }; 2966 2967 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2968 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2969 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2970 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2971 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2972 .len = sizeof(union nf_inet_addr) }, 2973 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2974 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2975 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2976 .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, 2977 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2978 .len = IP_VS_PENAME_MAXLEN }, 2979 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2980 .len = sizeof(struct ip_vs_flags) }, 2981 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2982 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2983 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2984 }; 2985 2986 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2987 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2988 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2989 .len = sizeof(union nf_inet_addr) }, 2990 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2991 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2992 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2993 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2994 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2995 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2996 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2997 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2998 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2999 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 3000 [IPVS_DEST_ATTR_TUN_TYPE] = { .type = NLA_U8 }, 3001 [IPVS_DEST_ATTR_TUN_PORT] = { .type = NLA_U16 }, 3002 [IPVS_DEST_ATTR_TUN_FLAGS] = { .type = NLA_U16 }, 3003 }; 3004 3005 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 3006 struct ip_vs_kstats *kstats) 3007 { 3008 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3009 3010 if (!nl_stats) 3011 return -EMSGSIZE; 3012 3013 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || 3014 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || 3015 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || 3016 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3017 IPVS_STATS_ATTR_PAD) || 3018 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3019 IPVS_STATS_ATTR_PAD) || 3020 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || 3021 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || 3022 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || 3023 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || 3024 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) 3025 goto nla_put_failure; 3026 nla_nest_end(skb, nl_stats); 3027 3028 return 0; 3029 3030 nla_put_failure: 3031 nla_nest_cancel(skb, nl_stats); 3032 return -EMSGSIZE; 3033 } 3034 3035 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, 3036 struct ip_vs_kstats *kstats) 3037 { 3038 struct nlattr *nl_stats = nla_nest_start_noflag(skb, container_type); 3039 3040 if (!nl_stats) 3041 return -EMSGSIZE; 3042 3043 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, 3044 IPVS_STATS_ATTR_PAD) || 3045 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, 3046 IPVS_STATS_ATTR_PAD) || 3047 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, 3048 IPVS_STATS_ATTR_PAD) || 3049 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 3050 IPVS_STATS_ATTR_PAD) || 3051 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 3052 IPVS_STATS_ATTR_PAD) || 3053 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, 3054 IPVS_STATS_ATTR_PAD) || 3055 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, 3056 IPVS_STATS_ATTR_PAD) || 3057 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, 3058 IPVS_STATS_ATTR_PAD) || 3059 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, 3060 IPVS_STATS_ATTR_PAD) || 3061 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, 3062 IPVS_STATS_ATTR_PAD)) 3063 goto nla_put_failure; 3064 nla_nest_end(skb, nl_stats); 3065 3066 return 0; 3067 3068 nla_put_failure: 3069 nla_nest_cancel(skb, nl_stats); 3070 return -EMSGSIZE; 3071 } 3072 3073 static int ip_vs_genl_fill_service(struct sk_buff *skb, 3074 struct ip_vs_service *svc) 3075 { 3076 struct ip_vs_scheduler *sched; 3077 struct ip_vs_pe *pe; 3078 struct nlattr *nl_service; 3079 struct ip_vs_flags flags = { .flags = svc->flags, 3080 .mask = ~0 }; 3081 struct ip_vs_kstats kstats; 3082 char *sched_name; 3083 3084 nl_service = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_SERVICE); 3085 if (!nl_service) 3086 return -EMSGSIZE; 3087 3088 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 3089 goto nla_put_failure; 3090 if (svc->fwmark) { 3091 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 3092 goto nla_put_failure; 3093 } else { 3094 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 3095 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 3096 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 3097 goto nla_put_failure; 3098 } 3099 3100 sched = rcu_dereference_protected(svc->scheduler, 1); 3101 sched_name = sched ? sched->name : "none"; 3102 pe = rcu_dereference_protected(svc->pe, 1); 3103 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 3104 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 3105 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 3106 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 3107 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 3108 goto nla_put_failure; 3109 ip_vs_copy_stats(&kstats, &svc->stats); 3110 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) 3111 goto nla_put_failure; 3112 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) 3113 goto nla_put_failure; 3114 3115 nla_nest_end(skb, nl_service); 3116 3117 return 0; 3118 3119 nla_put_failure: 3120 nla_nest_cancel(skb, nl_service); 3121 return -EMSGSIZE; 3122 } 3123 3124 static int ip_vs_genl_dump_service(struct sk_buff *skb, 3125 struct ip_vs_service *svc, 3126 struct netlink_callback *cb) 3127 { 3128 void *hdr; 3129 3130 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3131 &ip_vs_genl_family, NLM_F_MULTI, 3132 IPVS_CMD_NEW_SERVICE); 3133 if (!hdr) 3134 return -EMSGSIZE; 3135 3136 if (ip_vs_genl_fill_service(skb, svc) < 0) 3137 goto nla_put_failure; 3138 3139 genlmsg_end(skb, hdr); 3140 return 0; 3141 3142 nla_put_failure: 3143 genlmsg_cancel(skb, hdr); 3144 return -EMSGSIZE; 3145 } 3146 3147 static int ip_vs_genl_dump_services(struct sk_buff *skb, 3148 struct netlink_callback *cb) 3149 { 3150 int idx = 0, i; 3151 int start = cb->args[0]; 3152 struct ip_vs_service *svc; 3153 struct net *net = sock_net(skb->sk); 3154 struct netns_ipvs *ipvs = net_ipvs(net); 3155 3156 mutex_lock(&__ip_vs_mutex); 3157 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3158 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3159 if (++idx <= start || (svc->ipvs != ipvs)) 3160 continue; 3161 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3162 idx--; 3163 goto nla_put_failure; 3164 } 3165 } 3166 } 3167 3168 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3169 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3170 if (++idx <= start || (svc->ipvs != ipvs)) 3171 continue; 3172 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3173 idx--; 3174 goto nla_put_failure; 3175 } 3176 } 3177 } 3178 3179 nla_put_failure: 3180 mutex_unlock(&__ip_vs_mutex); 3181 cb->args[0] = idx; 3182 3183 return skb->len; 3184 } 3185 3186 static bool ip_vs_is_af_valid(int af) 3187 { 3188 if (af == AF_INET) 3189 return true; 3190 #ifdef CONFIG_IP_VS_IPV6 3191 if (af == AF_INET6 && ipv6_mod_enabled()) 3192 return true; 3193 #endif 3194 return false; 3195 } 3196 3197 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3198 struct ip_vs_service_user_kern *usvc, 3199 struct nlattr *nla, bool full_entry, 3200 struct ip_vs_service **ret_svc) 3201 { 3202 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 3203 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 3204 struct ip_vs_service *svc; 3205 3206 /* Parse mandatory identifying service fields first */ 3207 if (nla == NULL || 3208 nla_parse_nested_deprecated(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy, NULL)) 3209 return -EINVAL; 3210 3211 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3212 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3213 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3214 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3215 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3216 3217 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3218 return -EINVAL; 3219 3220 memset(usvc, 0, sizeof(*usvc)); 3221 3222 usvc->af = nla_get_u16(nla_af); 3223 if (!ip_vs_is_af_valid(usvc->af)) 3224 return -EAFNOSUPPORT; 3225 3226 if (nla_fwmark) { 3227 usvc->protocol = IPPROTO_TCP; 3228 usvc->fwmark = nla_get_u32(nla_fwmark); 3229 } else { 3230 usvc->protocol = nla_get_u16(nla_protocol); 3231 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3232 usvc->port = nla_get_be16(nla_port); 3233 usvc->fwmark = 0; 3234 } 3235 3236 rcu_read_lock(); 3237 if (usvc->fwmark) 3238 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); 3239 else 3240 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, 3241 &usvc->addr, usvc->port); 3242 rcu_read_unlock(); 3243 *ret_svc = svc; 3244 3245 /* If a full entry was requested, check for the additional fields */ 3246 if (full_entry) { 3247 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3248 *nla_netmask; 3249 struct ip_vs_flags flags; 3250 3251 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3252 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3253 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3254 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3255 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3256 3257 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3258 return -EINVAL; 3259 3260 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3261 3262 /* prefill flags from service if it already exists */ 3263 if (svc) 3264 usvc->flags = svc->flags; 3265 3266 /* set new flags from userland */ 3267 usvc->flags = (usvc->flags & ~flags.mask) | 3268 (flags.flags & flags.mask); 3269 usvc->sched_name = nla_data(nla_sched); 3270 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3271 usvc->timeout = nla_get_u32(nla_timeout); 3272 usvc->netmask = nla_get_be32(nla_netmask); 3273 } 3274 3275 return 0; 3276 } 3277 3278 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, 3279 struct nlattr *nla) 3280 { 3281 struct ip_vs_service_user_kern usvc; 3282 struct ip_vs_service *svc; 3283 int ret; 3284 3285 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, false, &svc); 3286 return ret ? ERR_PTR(ret) : svc; 3287 } 3288 3289 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3290 { 3291 struct nlattr *nl_dest; 3292 struct ip_vs_kstats kstats; 3293 3294 nl_dest = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DEST); 3295 if (!nl_dest) 3296 return -EMSGSIZE; 3297 3298 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3299 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3300 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3301 (atomic_read(&dest->conn_flags) & 3302 IP_VS_CONN_F_FWD_MASK)) || 3303 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3304 atomic_read(&dest->weight)) || 3305 nla_put_u8(skb, IPVS_DEST_ATTR_TUN_TYPE, 3306 dest->tun_type) || 3307 nla_put_be16(skb, IPVS_DEST_ATTR_TUN_PORT, 3308 dest->tun_port) || 3309 nla_put_u16(skb, IPVS_DEST_ATTR_TUN_FLAGS, 3310 dest->tun_flags) || 3311 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3312 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3313 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3314 atomic_read(&dest->activeconns)) || 3315 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3316 atomic_read(&dest->inactconns)) || 3317 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3318 atomic_read(&dest->persistconns)) || 3319 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3320 goto nla_put_failure; 3321 ip_vs_copy_stats(&kstats, &dest->stats); 3322 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) 3323 goto nla_put_failure; 3324 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) 3325 goto nla_put_failure; 3326 3327 nla_nest_end(skb, nl_dest); 3328 3329 return 0; 3330 3331 nla_put_failure: 3332 nla_nest_cancel(skb, nl_dest); 3333 return -EMSGSIZE; 3334 } 3335 3336 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3337 struct netlink_callback *cb) 3338 { 3339 void *hdr; 3340 3341 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3342 &ip_vs_genl_family, NLM_F_MULTI, 3343 IPVS_CMD_NEW_DEST); 3344 if (!hdr) 3345 return -EMSGSIZE; 3346 3347 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3348 goto nla_put_failure; 3349 3350 genlmsg_end(skb, hdr); 3351 return 0; 3352 3353 nla_put_failure: 3354 genlmsg_cancel(skb, hdr); 3355 return -EMSGSIZE; 3356 } 3357 3358 static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3359 struct netlink_callback *cb) 3360 { 3361 int idx = 0; 3362 int start = cb->args[0]; 3363 struct ip_vs_service *svc; 3364 struct ip_vs_dest *dest; 3365 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3366 struct net *net = sock_net(skb->sk); 3367 struct netns_ipvs *ipvs = net_ipvs(net); 3368 3369 mutex_lock(&__ip_vs_mutex); 3370 3371 /* Try to find the service for which to dump destinations */ 3372 if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) 3373 goto out_err; 3374 3375 3376 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3377 if (IS_ERR_OR_NULL(svc)) 3378 goto out_err; 3379 3380 /* Dump the destinations */ 3381 list_for_each_entry(dest, &svc->destinations, n_list) { 3382 if (++idx <= start) 3383 continue; 3384 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3385 idx--; 3386 goto nla_put_failure; 3387 } 3388 } 3389 3390 nla_put_failure: 3391 cb->args[0] = idx; 3392 3393 out_err: 3394 mutex_unlock(&__ip_vs_mutex); 3395 3396 return skb->len; 3397 } 3398 3399 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3400 struct nlattr *nla, bool full_entry) 3401 { 3402 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3403 struct nlattr *nla_addr, *nla_port; 3404 struct nlattr *nla_addr_family; 3405 3406 /* Parse mandatory identifying destination fields first */ 3407 if (nla == NULL || 3408 nla_parse_nested_deprecated(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy, NULL)) 3409 return -EINVAL; 3410 3411 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3412 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3413 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3414 3415 if (!(nla_addr && nla_port)) 3416 return -EINVAL; 3417 3418 memset(udest, 0, sizeof(*udest)); 3419 3420 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3421 udest->port = nla_get_be16(nla_port); 3422 3423 if (nla_addr_family) 3424 udest->af = nla_get_u16(nla_addr_family); 3425 else 3426 udest->af = 0; 3427 3428 /* If a full entry was requested, check for the additional fields */ 3429 if (full_entry) { 3430 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3431 *nla_l_thresh, *nla_tun_type, *nla_tun_port, 3432 *nla_tun_flags; 3433 3434 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3435 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3436 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3437 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3438 nla_tun_type = attrs[IPVS_DEST_ATTR_TUN_TYPE]; 3439 nla_tun_port = attrs[IPVS_DEST_ATTR_TUN_PORT]; 3440 nla_tun_flags = attrs[IPVS_DEST_ATTR_TUN_FLAGS]; 3441 3442 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3443 return -EINVAL; 3444 3445 udest->conn_flags = nla_get_u32(nla_fwd) 3446 & IP_VS_CONN_F_FWD_MASK; 3447 udest->weight = nla_get_u32(nla_weight); 3448 udest->u_threshold = nla_get_u32(nla_u_thresh); 3449 udest->l_threshold = nla_get_u32(nla_l_thresh); 3450 3451 if (nla_tun_type) 3452 udest->tun_type = nla_get_u8(nla_tun_type); 3453 3454 if (nla_tun_port) 3455 udest->tun_port = nla_get_be16(nla_tun_port); 3456 3457 if (nla_tun_flags) 3458 udest->tun_flags = nla_get_u16(nla_tun_flags); 3459 } 3460 3461 return 0; 3462 } 3463 3464 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3465 struct ipvs_sync_daemon_cfg *c) 3466 { 3467 struct nlattr *nl_daemon; 3468 3469 nl_daemon = nla_nest_start_noflag(skb, IPVS_CMD_ATTR_DAEMON); 3470 if (!nl_daemon) 3471 return -EMSGSIZE; 3472 3473 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3474 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || 3475 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || 3476 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || 3477 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || 3478 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) 3479 goto nla_put_failure; 3480 #ifdef CONFIG_IP_VS_IPV6 3481 if (c->mcast_af == AF_INET6) { 3482 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, 3483 &c->mcast_group.in6)) 3484 goto nla_put_failure; 3485 } else 3486 #endif 3487 if (c->mcast_af == AF_INET && 3488 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, 3489 c->mcast_group.ip)) 3490 goto nla_put_failure; 3491 nla_nest_end(skb, nl_daemon); 3492 3493 return 0; 3494 3495 nla_put_failure: 3496 nla_nest_cancel(skb, nl_daemon); 3497 return -EMSGSIZE; 3498 } 3499 3500 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3501 struct ipvs_sync_daemon_cfg *c, 3502 struct netlink_callback *cb) 3503 { 3504 void *hdr; 3505 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3506 &ip_vs_genl_family, NLM_F_MULTI, 3507 IPVS_CMD_NEW_DAEMON); 3508 if (!hdr) 3509 return -EMSGSIZE; 3510 3511 if (ip_vs_genl_fill_daemon(skb, state, c)) 3512 goto nla_put_failure; 3513 3514 genlmsg_end(skb, hdr); 3515 return 0; 3516 3517 nla_put_failure: 3518 genlmsg_cancel(skb, hdr); 3519 return -EMSGSIZE; 3520 } 3521 3522 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3523 struct netlink_callback *cb) 3524 { 3525 struct net *net = sock_net(skb->sk); 3526 struct netns_ipvs *ipvs = net_ipvs(net); 3527 3528 mutex_lock(&ipvs->sync_mutex); 3529 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3530 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3531 &ipvs->mcfg, cb) < 0) 3532 goto nla_put_failure; 3533 3534 cb->args[0] = 1; 3535 } 3536 3537 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3538 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3539 &ipvs->bcfg, cb) < 0) 3540 goto nla_put_failure; 3541 3542 cb->args[1] = 1; 3543 } 3544 3545 nla_put_failure: 3546 mutex_unlock(&ipvs->sync_mutex); 3547 3548 return skb->len; 3549 } 3550 3551 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3552 { 3553 struct ipvs_sync_daemon_cfg c; 3554 struct nlattr *a; 3555 int ret; 3556 3557 memset(&c, 0, sizeof(c)); 3558 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3559 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3560 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3561 return -EINVAL; 3562 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3563 sizeof(c.mcast_ifn)); 3564 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); 3565 3566 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; 3567 if (a) 3568 c.sync_maxlen = nla_get_u16(a); 3569 3570 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; 3571 if (a) { 3572 c.mcast_af = AF_INET; 3573 c.mcast_group.ip = nla_get_in_addr(a); 3574 if (!ipv4_is_multicast(c.mcast_group.ip)) 3575 return -EINVAL; 3576 } else { 3577 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; 3578 if (a) { 3579 #ifdef CONFIG_IP_VS_IPV6 3580 int addr_type; 3581 3582 c.mcast_af = AF_INET6; 3583 c.mcast_group.in6 = nla_get_in6_addr(a); 3584 addr_type = ipv6_addr_type(&c.mcast_group.in6); 3585 if (!(addr_type & IPV6_ADDR_MULTICAST)) 3586 return -EINVAL; 3587 #else 3588 return -EAFNOSUPPORT; 3589 #endif 3590 } 3591 } 3592 3593 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; 3594 if (a) 3595 c.mcast_port = nla_get_u16(a); 3596 3597 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; 3598 if (a) 3599 c.mcast_ttl = nla_get_u8(a); 3600 3601 /* The synchronization protocol is incompatible with mixed family 3602 * services 3603 */ 3604 if (ipvs->mixed_address_family_dests > 0) 3605 return -EINVAL; 3606 3607 ret = start_sync_thread(ipvs, &c, 3608 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3609 return ret; 3610 } 3611 3612 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3613 { 3614 int ret; 3615 3616 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3617 return -EINVAL; 3618 3619 ret = stop_sync_thread(ipvs, 3620 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3621 return ret; 3622 } 3623 3624 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) 3625 { 3626 struct ip_vs_timeout_user t; 3627 3628 __ip_vs_get_timeouts(ipvs, &t); 3629 3630 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3631 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3632 3633 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3634 t.tcp_fin_timeout = 3635 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3636 3637 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3638 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3639 3640 return ip_vs_set_timeout(ipvs, &t); 3641 } 3642 3643 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3644 { 3645 int ret = -EINVAL, cmd; 3646 struct net *net = sock_net(skb->sk); 3647 struct netns_ipvs *ipvs = net_ipvs(net); 3648 3649 cmd = info->genlhdr->cmd; 3650 3651 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3652 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3653 3654 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3655 nla_parse_nested_deprecated(daemon_attrs, IPVS_DAEMON_ATTR_MAX, info->attrs[IPVS_CMD_ATTR_DAEMON], ip_vs_daemon_policy, info->extack)) 3656 goto out; 3657 3658 if (cmd == IPVS_CMD_NEW_DAEMON) 3659 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); 3660 else 3661 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); 3662 } 3663 3664 out: 3665 return ret; 3666 } 3667 3668 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3669 { 3670 bool need_full_svc = false, need_full_dest = false; 3671 struct ip_vs_service *svc = NULL; 3672 struct ip_vs_service_user_kern usvc; 3673 struct ip_vs_dest_user_kern udest; 3674 int ret = 0, cmd; 3675 struct net *net = sock_net(skb->sk); 3676 struct netns_ipvs *ipvs = net_ipvs(net); 3677 3678 cmd = info->genlhdr->cmd; 3679 3680 mutex_lock(&__ip_vs_mutex); 3681 3682 if (cmd == IPVS_CMD_FLUSH) { 3683 ret = ip_vs_flush(ipvs, false); 3684 goto out; 3685 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3686 ret = ip_vs_genl_set_config(ipvs, info->attrs); 3687 goto out; 3688 } else if (cmd == IPVS_CMD_ZERO && 3689 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3690 ret = ip_vs_zero_all(ipvs); 3691 goto out; 3692 } 3693 3694 /* All following commands require a service argument, so check if we 3695 * received a valid one. We need a full service specification when 3696 * adding / editing a service. Only identifying members otherwise. */ 3697 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3698 need_full_svc = true; 3699 3700 ret = ip_vs_genl_parse_service(ipvs, &usvc, 3701 info->attrs[IPVS_CMD_ATTR_SERVICE], 3702 need_full_svc, &svc); 3703 if (ret) 3704 goto out; 3705 3706 /* Unless we're adding a new service, the service must already exist */ 3707 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3708 ret = -ESRCH; 3709 goto out; 3710 } 3711 3712 /* Destination commands require a valid destination argument. For 3713 * adding / editing a destination, we need a full destination 3714 * specification. */ 3715 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3716 cmd == IPVS_CMD_DEL_DEST) { 3717 if (cmd != IPVS_CMD_DEL_DEST) 3718 need_full_dest = true; 3719 3720 ret = ip_vs_genl_parse_dest(&udest, 3721 info->attrs[IPVS_CMD_ATTR_DEST], 3722 need_full_dest); 3723 if (ret) 3724 goto out; 3725 3726 /* Old protocols did not allow the user to specify address 3727 * family, so we set it to zero instead. We also didn't 3728 * allow heterogeneous pools in the old code, so it's safe 3729 * to assume that this will have the same address family as 3730 * the service. 3731 */ 3732 if (udest.af == 0) 3733 udest.af = svc->af; 3734 3735 if (!ip_vs_is_af_valid(udest.af)) { 3736 ret = -EAFNOSUPPORT; 3737 goto out; 3738 } 3739 3740 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3741 /* The synchronization protocol is incompatible 3742 * with mixed family services 3743 */ 3744 if (ipvs->sync_state) { 3745 ret = -EINVAL; 3746 goto out; 3747 } 3748 3749 /* Which connection types do we support? */ 3750 switch (udest.conn_flags) { 3751 case IP_VS_CONN_F_TUNNEL: 3752 /* We are able to forward this */ 3753 break; 3754 default: 3755 ret = -EINVAL; 3756 goto out; 3757 } 3758 } 3759 } 3760 3761 switch (cmd) { 3762 case IPVS_CMD_NEW_SERVICE: 3763 if (svc == NULL) 3764 ret = ip_vs_add_service(ipvs, &usvc, &svc); 3765 else 3766 ret = -EEXIST; 3767 break; 3768 case IPVS_CMD_SET_SERVICE: 3769 ret = ip_vs_edit_service(svc, &usvc); 3770 break; 3771 case IPVS_CMD_DEL_SERVICE: 3772 ret = ip_vs_del_service(svc); 3773 /* do not use svc, it can be freed */ 3774 break; 3775 case IPVS_CMD_NEW_DEST: 3776 ret = ip_vs_add_dest(svc, &udest); 3777 break; 3778 case IPVS_CMD_SET_DEST: 3779 ret = ip_vs_edit_dest(svc, &udest); 3780 break; 3781 case IPVS_CMD_DEL_DEST: 3782 ret = ip_vs_del_dest(svc, &udest); 3783 break; 3784 case IPVS_CMD_ZERO: 3785 ret = ip_vs_zero_service(svc); 3786 break; 3787 default: 3788 ret = -EINVAL; 3789 } 3790 3791 out: 3792 mutex_unlock(&__ip_vs_mutex); 3793 3794 return ret; 3795 } 3796 3797 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3798 { 3799 struct sk_buff *msg; 3800 void *reply; 3801 int ret, cmd, reply_cmd; 3802 struct net *net = sock_net(skb->sk); 3803 struct netns_ipvs *ipvs = net_ipvs(net); 3804 3805 cmd = info->genlhdr->cmd; 3806 3807 if (cmd == IPVS_CMD_GET_SERVICE) 3808 reply_cmd = IPVS_CMD_NEW_SERVICE; 3809 else if (cmd == IPVS_CMD_GET_INFO) 3810 reply_cmd = IPVS_CMD_SET_INFO; 3811 else if (cmd == IPVS_CMD_GET_CONFIG) 3812 reply_cmd = IPVS_CMD_SET_CONFIG; 3813 else { 3814 pr_err("unknown Generic Netlink command\n"); 3815 return -EINVAL; 3816 } 3817 3818 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3819 if (!msg) 3820 return -ENOMEM; 3821 3822 mutex_lock(&__ip_vs_mutex); 3823 3824 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3825 if (reply == NULL) 3826 goto nla_put_failure; 3827 3828 switch (cmd) { 3829 case IPVS_CMD_GET_SERVICE: 3830 { 3831 struct ip_vs_service *svc; 3832 3833 svc = ip_vs_genl_find_service(ipvs, 3834 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3835 if (IS_ERR(svc)) { 3836 ret = PTR_ERR(svc); 3837 goto out_err; 3838 } else if (svc) { 3839 ret = ip_vs_genl_fill_service(msg, svc); 3840 if (ret) 3841 goto nla_put_failure; 3842 } else { 3843 ret = -ESRCH; 3844 goto out_err; 3845 } 3846 3847 break; 3848 } 3849 3850 case IPVS_CMD_GET_CONFIG: 3851 { 3852 struct ip_vs_timeout_user t; 3853 3854 __ip_vs_get_timeouts(ipvs, &t); 3855 #ifdef CONFIG_IP_VS_PROTO_TCP 3856 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3857 t.tcp_timeout) || 3858 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3859 t.tcp_fin_timeout)) 3860 goto nla_put_failure; 3861 #endif 3862 #ifdef CONFIG_IP_VS_PROTO_UDP 3863 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3864 goto nla_put_failure; 3865 #endif 3866 3867 break; 3868 } 3869 3870 case IPVS_CMD_GET_INFO: 3871 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3872 IP_VS_VERSION_CODE) || 3873 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3874 ip_vs_conn_tab_size)) 3875 goto nla_put_failure; 3876 break; 3877 } 3878 3879 genlmsg_end(msg, reply); 3880 ret = genlmsg_reply(msg, info); 3881 goto out; 3882 3883 nla_put_failure: 3884 pr_err("not enough space in Netlink message\n"); 3885 ret = -EMSGSIZE; 3886 3887 out_err: 3888 nlmsg_free(msg); 3889 out: 3890 mutex_unlock(&__ip_vs_mutex); 3891 3892 return ret; 3893 } 3894 3895 3896 static const struct genl_small_ops ip_vs_genl_ops[] = { 3897 { 3898 .cmd = IPVS_CMD_NEW_SERVICE, 3899 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3900 .flags = GENL_ADMIN_PERM, 3901 .doit = ip_vs_genl_set_cmd, 3902 }, 3903 { 3904 .cmd = IPVS_CMD_SET_SERVICE, 3905 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3906 .flags = GENL_ADMIN_PERM, 3907 .doit = ip_vs_genl_set_cmd, 3908 }, 3909 { 3910 .cmd = IPVS_CMD_DEL_SERVICE, 3911 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3912 .flags = GENL_ADMIN_PERM, 3913 .doit = ip_vs_genl_set_cmd, 3914 }, 3915 { 3916 .cmd = IPVS_CMD_GET_SERVICE, 3917 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3918 .flags = GENL_ADMIN_PERM, 3919 .doit = ip_vs_genl_get_cmd, 3920 .dumpit = ip_vs_genl_dump_services, 3921 }, 3922 { 3923 .cmd = IPVS_CMD_NEW_DEST, 3924 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3925 .flags = GENL_ADMIN_PERM, 3926 .doit = ip_vs_genl_set_cmd, 3927 }, 3928 { 3929 .cmd = IPVS_CMD_SET_DEST, 3930 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3931 .flags = GENL_ADMIN_PERM, 3932 .doit = ip_vs_genl_set_cmd, 3933 }, 3934 { 3935 .cmd = IPVS_CMD_DEL_DEST, 3936 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3937 .flags = GENL_ADMIN_PERM, 3938 .doit = ip_vs_genl_set_cmd, 3939 }, 3940 { 3941 .cmd = IPVS_CMD_GET_DEST, 3942 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3943 .flags = GENL_ADMIN_PERM, 3944 .dumpit = ip_vs_genl_dump_dests, 3945 }, 3946 { 3947 .cmd = IPVS_CMD_NEW_DAEMON, 3948 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3949 .flags = GENL_ADMIN_PERM, 3950 .doit = ip_vs_genl_set_daemon, 3951 }, 3952 { 3953 .cmd = IPVS_CMD_DEL_DAEMON, 3954 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3955 .flags = GENL_ADMIN_PERM, 3956 .doit = ip_vs_genl_set_daemon, 3957 }, 3958 { 3959 .cmd = IPVS_CMD_GET_DAEMON, 3960 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3961 .flags = GENL_ADMIN_PERM, 3962 .dumpit = ip_vs_genl_dump_daemons, 3963 }, 3964 { 3965 .cmd = IPVS_CMD_SET_CONFIG, 3966 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3967 .flags = GENL_ADMIN_PERM, 3968 .doit = ip_vs_genl_set_cmd, 3969 }, 3970 { 3971 .cmd = IPVS_CMD_GET_CONFIG, 3972 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3973 .flags = GENL_ADMIN_PERM, 3974 .doit = ip_vs_genl_get_cmd, 3975 }, 3976 { 3977 .cmd = IPVS_CMD_GET_INFO, 3978 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3979 .flags = GENL_ADMIN_PERM, 3980 .doit = ip_vs_genl_get_cmd, 3981 }, 3982 { 3983 .cmd = IPVS_CMD_ZERO, 3984 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3985 .flags = GENL_ADMIN_PERM, 3986 .doit = ip_vs_genl_set_cmd, 3987 }, 3988 { 3989 .cmd = IPVS_CMD_FLUSH, 3990 .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, 3991 .flags = GENL_ADMIN_PERM, 3992 .doit = ip_vs_genl_set_cmd, 3993 }, 3994 }; 3995 3996 static struct genl_family ip_vs_genl_family __ro_after_init = { 3997 .hdrsize = 0, 3998 .name = IPVS_GENL_NAME, 3999 .version = IPVS_GENL_VERSION, 4000 .maxattr = IPVS_CMD_ATTR_MAX, 4001 .policy = ip_vs_cmd_policy, 4002 .netnsok = true, /* Make ipvsadm to work on netns */ 4003 .module = THIS_MODULE, 4004 .small_ops = ip_vs_genl_ops, 4005 .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), 4006 }; 4007 4008 static int __init ip_vs_genl_register(void) 4009 { 4010 return genl_register_family(&ip_vs_genl_family); 4011 } 4012 4013 static void ip_vs_genl_unregister(void) 4014 { 4015 genl_unregister_family(&ip_vs_genl_family); 4016 } 4017 4018 /* End of Generic Netlink interface definitions */ 4019 4020 /* 4021 * per netns intit/exit func. 4022 */ 4023 #ifdef CONFIG_SYSCTL 4024 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) 4025 { 4026 struct net *net = ipvs->net; 4027 int idx; 4028 struct ctl_table *tbl; 4029 4030 atomic_set(&ipvs->dropentry, 0); 4031 spin_lock_init(&ipvs->dropentry_lock); 4032 spin_lock_init(&ipvs->droppacket_lock); 4033 spin_lock_init(&ipvs->securetcp_lock); 4034 4035 if (!net_eq(net, &init_net)) { 4036 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 4037 if (tbl == NULL) 4038 return -ENOMEM; 4039 4040 /* Don't export sysctls to unprivileged users */ 4041 if (net->user_ns != &init_user_ns) 4042 tbl[0].procname = NULL; 4043 } else 4044 tbl = vs_vars; 4045 /* Initialize sysctl defaults */ 4046 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { 4047 if (tbl[idx].proc_handler == proc_do_defense_mode) 4048 tbl[idx].extra2 = ipvs; 4049 } 4050 idx = 0; 4051 ipvs->sysctl_amemthresh = 1024; 4052 tbl[idx++].data = &ipvs->sysctl_amemthresh; 4053 ipvs->sysctl_am_droprate = 10; 4054 tbl[idx++].data = &ipvs->sysctl_am_droprate; 4055 tbl[idx++].data = &ipvs->sysctl_drop_entry; 4056 tbl[idx++].data = &ipvs->sysctl_drop_packet; 4057 #ifdef CONFIG_IP_VS_NFCT 4058 tbl[idx++].data = &ipvs->sysctl_conntrack; 4059 #endif 4060 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 4061 ipvs->sysctl_snat_reroute = 1; 4062 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 4063 ipvs->sysctl_sync_ver = 1; 4064 tbl[idx++].data = &ipvs->sysctl_sync_ver; 4065 ipvs->sysctl_sync_ports = 1; 4066 tbl[idx++].data = &ipvs->sysctl_sync_ports; 4067 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 4068 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 4069 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 4070 ipvs->sysctl_sync_sock_size = 0; 4071 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 4072 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 4073 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 4074 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 4075 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 4076 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 4077 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 4078 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 4079 tbl[idx].data = &ipvs->sysctl_sync_threshold; 4080 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 4081 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 4082 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 4083 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 4084 tbl[idx++].data = &ipvs->sysctl_sync_retries; 4085 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 4086 ipvs->sysctl_pmtu_disc = 1; 4087 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 4088 tbl[idx++].data = &ipvs->sysctl_backup_only; 4089 ipvs->sysctl_conn_reuse_mode = 1; 4090 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 4091 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 4092 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 4093 #ifdef CONFIG_IP_VS_DEBUG 4094 /* Global sysctls must be ro in non-init netns */ 4095 if (!net_eq(net, &init_net)) 4096 tbl[idx++].mode = 0444; 4097 #endif 4098 4099 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 4100 if (ipvs->sysctl_hdr == NULL) { 4101 if (!net_eq(net, &init_net)) 4102 kfree(tbl); 4103 return -ENOMEM; 4104 } 4105 ip_vs_start_estimator(ipvs, &ipvs->tot_stats); 4106 ipvs->sysctl_tbl = tbl; 4107 /* Schedule defense work */ 4108 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 4109 queue_delayed_work(system_long_wq, &ipvs->defense_work, 4110 DEFENSE_TIMER_PERIOD); 4111 4112 /* Init delayed work for expiring no dest conn */ 4113 INIT_DELAYED_WORK(&ipvs->expire_nodest_conn_work, 4114 expire_nodest_conn_handler); 4115 4116 return 0; 4117 } 4118 4119 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) 4120 { 4121 struct net *net = ipvs->net; 4122 4123 cancel_delayed_work_sync(&ipvs->expire_nodest_conn_work); 4124 cancel_delayed_work_sync(&ipvs->defense_work); 4125 cancel_work_sync(&ipvs->defense_work.work); 4126 unregister_net_sysctl_table(ipvs->sysctl_hdr); 4127 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); 4128 4129 if (!net_eq(net, &init_net)) 4130 kfree(ipvs->sysctl_tbl); 4131 } 4132 4133 #else 4134 4135 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } 4136 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } 4137 4138 #endif 4139 4140 static struct notifier_block ip_vs_dst_notifier = { 4141 .notifier_call = ip_vs_dst_event, 4142 #ifdef CONFIG_IP_VS_IPV6 4143 .priority = ADDRCONF_NOTIFY_PRIORITY + 5, 4144 #endif 4145 }; 4146 4147 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 4148 { 4149 int i, idx; 4150 4151 /* Initialize rs_table */ 4152 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 4153 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 4154 4155 INIT_LIST_HEAD(&ipvs->dest_trash); 4156 spin_lock_init(&ipvs->dest_trash_lock); 4157 timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); 4158 atomic_set(&ipvs->ftpsvc_counter, 0); 4159 atomic_set(&ipvs->nullsvc_counter, 0); 4160 atomic_set(&ipvs->conn_out_counter, 0); 4161 4162 /* procfs stats */ 4163 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4164 if (!ipvs->tot_stats.cpustats) 4165 return -ENOMEM; 4166 4167 for_each_possible_cpu(i) { 4168 struct ip_vs_cpu_stats *ipvs_tot_stats; 4169 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 4170 u64_stats_init(&ipvs_tot_stats->syncp); 4171 } 4172 4173 spin_lock_init(&ipvs->tot_stats.lock); 4174 4175 #ifdef CONFIG_PROC_FS 4176 if (!proc_create_net("ip_vs", 0, ipvs->net->proc_net, 4177 &ip_vs_info_seq_ops, sizeof(struct ip_vs_iter))) 4178 goto err_vs; 4179 if (!proc_create_net_single("ip_vs_stats", 0, ipvs->net->proc_net, 4180 ip_vs_stats_show, NULL)) 4181 goto err_stats; 4182 if (!proc_create_net_single("ip_vs_stats_percpu", 0, 4183 ipvs->net->proc_net, 4184 ip_vs_stats_percpu_show, NULL)) 4185 goto err_percpu; 4186 #endif 4187 4188 if (ip_vs_control_net_init_sysctl(ipvs)) 4189 goto err; 4190 4191 return 0; 4192 4193 err: 4194 #ifdef CONFIG_PROC_FS 4195 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4196 4197 err_percpu: 4198 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4199 4200 err_stats: 4201 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4202 4203 err_vs: 4204 #endif 4205 free_percpu(ipvs->tot_stats.cpustats); 4206 return -ENOMEM; 4207 } 4208 4209 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 4210 { 4211 ip_vs_trash_cleanup(ipvs); 4212 ip_vs_control_net_cleanup_sysctl(ipvs); 4213 #ifdef CONFIG_PROC_FS 4214 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4215 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4216 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4217 #endif 4218 free_percpu(ipvs->tot_stats.cpustats); 4219 } 4220 4221 int __init ip_vs_register_nl_ioctl(void) 4222 { 4223 int ret; 4224 4225 ret = nf_register_sockopt(&ip_vs_sockopts); 4226 if (ret) { 4227 pr_err("cannot register sockopt.\n"); 4228 goto err_sock; 4229 } 4230 4231 ret = ip_vs_genl_register(); 4232 if (ret) { 4233 pr_err("cannot register Generic Netlink interface.\n"); 4234 goto err_genl; 4235 } 4236 return 0; 4237 4238 err_genl: 4239 nf_unregister_sockopt(&ip_vs_sockopts); 4240 err_sock: 4241 return ret; 4242 } 4243 4244 void ip_vs_unregister_nl_ioctl(void) 4245 { 4246 ip_vs_genl_unregister(); 4247 nf_unregister_sockopt(&ip_vs_sockopts); 4248 } 4249 4250 int __init ip_vs_control_init(void) 4251 { 4252 int idx; 4253 int ret; 4254 4255 EnterFunction(2); 4256 4257 /* Initialize svc_table, ip_vs_svc_fwm_table */ 4258 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 4259 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 4260 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 4261 } 4262 4263 smp_wmb(); /* Do we really need it now ? */ 4264 4265 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 4266 if (ret < 0) 4267 return ret; 4268 4269 LeaveFunction(2); 4270 return 0; 4271 } 4272 4273 4274 void ip_vs_control_cleanup(void) 4275 { 4276 EnterFunction(2); 4277 unregister_netdevice_notifier(&ip_vs_dst_notifier); 4278 LeaveFunction(2); 4279 } 4280
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.