1 /* 2 * IPVS An implementation of the IP virtual server support for the 3 * LINUX operating system. IPVS is now implemented as a module 4 * over the NetFilter framework. IPVS can be used to build a 5 * high-performance and highly available server based on a 6 * cluster of servers. 7 * 8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 9 * Peter Kese <peter.kese@ijs.si> 10 * Julian Anastasov <ja@ssi.bg> 11 * 12 * This program is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU General Public License 14 * as published by the Free Software Foundation; either version 15 * 2 of the License, or (at your option) any later version. 16 * 17 * Changes: 18 * 19 */ 20 21 #define KMSG_COMPONENT "IPVS" 22 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 23 24 #include <linux/module.h> 25 #include <linux/init.h> 26 #include <linux/types.h> 27 #include <linux/capability.h> 28 #include <linux/fs.h> 29 #include <linux/sysctl.h> 30 #include <linux/proc_fs.h> 31 #include <linux/workqueue.h> 32 #include <linux/swap.h> 33 #include <linux/seq_file.h> 34 #include <linux/slab.h> 35 36 #include <linux/netfilter.h> 37 #include <linux/netfilter_ipv4.h> 38 #include <linux/mutex.h> 39 40 #include <net/net_namespace.h> 41 #include <linux/nsproxy.h> 42 #include <net/ip.h> 43 #ifdef CONFIG_IP_VS_IPV6 44 #include <net/ipv6.h> 45 #include <net/ip6_route.h> 46 #endif 47 #include <net/route.h> 48 #include <net/sock.h> 49 #include <net/genetlink.h> 50 51 #include <asm/uaccess.h> 52 53 #include <net/ip_vs.h> 54 55 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */ 56 static DEFINE_MUTEX(__ip_vs_mutex); 57 58 /* sysctl variables */ 59 60 #ifdef CONFIG_IP_VS_DEBUG 61 static int sysctl_ip_vs_debug_level = 0; 62 63 int ip_vs_get_debug_level(void) 64 { 65 return sysctl_ip_vs_debug_level; 66 } 67 #endif 68 69 70 /* Protos */ 71 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup); 72 73 74 #ifdef CONFIG_IP_VS_IPV6 75 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ 76 static bool __ip_vs_addr_is_local_v6(struct net *net, 77 const struct in6_addr *addr) 78 { 79 struct flowi6 fl6 = { 80 .daddr = *addr, 81 }; 82 struct dst_entry *dst = ip6_route_output(net, NULL, &fl6); 83 bool is_local; 84 85 is_local = !dst->error && dst->dev && (dst->dev->flags & IFF_LOOPBACK); 86 87 dst_release(dst); 88 return is_local; 89 } 90 #endif 91 92 #ifdef CONFIG_SYSCTL 93 /* 94 * update_defense_level is called from keventd and from sysctl, 95 * so it needs to protect itself from softirqs 96 */ 97 static void update_defense_level(struct netns_ipvs *ipvs) 98 { 99 struct sysinfo i; 100 int availmem; 101 int nomem; 102 int to_change = -1; 103 104 /* we only count free and buffered memory (in pages) */ 105 si_meminfo(&i); 106 availmem = i.freeram + i.bufferram; 107 /* however in linux 2.5 the i.bufferram is total page cache size, 108 we need adjust it */ 109 /* si_swapinfo(&i); */ 110 /* availmem = availmem - (i.totalswap - i.freeswap); */ 111 112 nomem = (availmem < ipvs->sysctl_amemthresh); 113 114 local_bh_disable(); 115 116 /* drop_entry */ 117 spin_lock(&ipvs->dropentry_lock); 118 switch (ipvs->sysctl_drop_entry) { 119 case 0: 120 atomic_set(&ipvs->dropentry, 0); 121 break; 122 case 1: 123 if (nomem) { 124 atomic_set(&ipvs->dropentry, 1); 125 ipvs->sysctl_drop_entry = 2; 126 } else { 127 atomic_set(&ipvs->dropentry, 0); 128 } 129 break; 130 case 2: 131 if (nomem) { 132 atomic_set(&ipvs->dropentry, 1); 133 } else { 134 atomic_set(&ipvs->dropentry, 0); 135 ipvs->sysctl_drop_entry = 1; 136 }; 137 break; 138 case 3: 139 atomic_set(&ipvs->dropentry, 1); 140 break; 141 } 142 spin_unlock(&ipvs->dropentry_lock); 143 144 /* drop_packet */ 145 spin_lock(&ipvs->droppacket_lock); 146 switch (ipvs->sysctl_drop_packet) { 147 case 0: 148 ipvs->drop_rate = 0; 149 break; 150 case 1: 151 if (nomem) { 152 ipvs->drop_rate = ipvs->drop_counter 153 = ipvs->sysctl_amemthresh / 154 (ipvs->sysctl_amemthresh-availmem); 155 ipvs->sysctl_drop_packet = 2; 156 } else { 157 ipvs->drop_rate = 0; 158 } 159 break; 160 case 2: 161 if (nomem) { 162 ipvs->drop_rate = ipvs->drop_counter 163 = ipvs->sysctl_amemthresh / 164 (ipvs->sysctl_amemthresh-availmem); 165 } else { 166 ipvs->drop_rate = 0; 167 ipvs->sysctl_drop_packet = 1; 168 } 169 break; 170 case 3: 171 ipvs->drop_rate = ipvs->sysctl_am_droprate; 172 break; 173 } 174 spin_unlock(&ipvs->droppacket_lock); 175 176 /* secure_tcp */ 177 spin_lock(&ipvs->securetcp_lock); 178 switch (ipvs->sysctl_secure_tcp) { 179 case 0: 180 if (ipvs->old_secure_tcp >= 2) 181 to_change = 0; 182 break; 183 case 1: 184 if (nomem) { 185 if (ipvs->old_secure_tcp < 2) 186 to_change = 1; 187 ipvs->sysctl_secure_tcp = 2; 188 } else { 189 if (ipvs->old_secure_tcp >= 2) 190 to_change = 0; 191 } 192 break; 193 case 2: 194 if (nomem) { 195 if (ipvs->old_secure_tcp < 2) 196 to_change = 1; 197 } else { 198 if (ipvs->old_secure_tcp >= 2) 199 to_change = 0; 200 ipvs->sysctl_secure_tcp = 1; 201 } 202 break; 203 case 3: 204 if (ipvs->old_secure_tcp < 2) 205 to_change = 1; 206 break; 207 } 208 ipvs->old_secure_tcp = ipvs->sysctl_secure_tcp; 209 if (to_change >= 0) 210 ip_vs_protocol_timeout_change(ipvs, 211 ipvs->sysctl_secure_tcp > 1); 212 spin_unlock(&ipvs->securetcp_lock); 213 214 local_bh_enable(); 215 } 216 217 218 /* 219 * Timer for checking the defense 220 */ 221 #define DEFENSE_TIMER_PERIOD 1*HZ 222 223 static void defense_work_handler(struct work_struct *work) 224 { 225 struct netns_ipvs *ipvs = 226 container_of(work, struct netns_ipvs, defense_work.work); 227 228 update_defense_level(ipvs); 229 if (atomic_read(&ipvs->dropentry)) 230 ip_vs_random_dropentry(ipvs); 231 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 232 } 233 #endif 234 235 int 236 ip_vs_use_count_inc(void) 237 { 238 return try_module_get(THIS_MODULE); 239 } 240 241 void 242 ip_vs_use_count_dec(void) 243 { 244 module_put(THIS_MODULE); 245 } 246 247 248 /* 249 * Hash table: for virtual service lookups 250 */ 251 #define IP_VS_SVC_TAB_BITS 8 252 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) 253 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) 254 255 /* the service table hashed by <protocol, addr, port> */ 256 static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; 257 /* the service table hashed by fwmark */ 258 static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; 259 260 261 /* 262 * Returns hash value for virtual service 263 */ 264 static inline unsigned int 265 ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, 266 const union nf_inet_addr *addr, __be16 port) 267 { 268 register unsigned int porth = ntohs(port); 269 __be32 addr_fold = addr->ip; 270 __u32 ahash; 271 272 #ifdef CONFIG_IP_VS_IPV6 273 if (af == AF_INET6) 274 addr_fold = addr->ip6[0]^addr->ip6[1]^ 275 addr->ip6[2]^addr->ip6[3]; 276 #endif 277 ahash = ntohl(addr_fold); 278 ahash ^= ((size_t) ipvs >> 8); 279 280 return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & 281 IP_VS_SVC_TAB_MASK; 282 } 283 284 /* 285 * Returns hash value of fwmark for virtual service lookup 286 */ 287 static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) 288 { 289 return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; 290 } 291 292 /* 293 * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> 294 * or in the ip_vs_svc_fwm_table by fwmark. 295 * Should be called with locked tables. 296 */ 297 static int ip_vs_svc_hash(struct ip_vs_service *svc) 298 { 299 unsigned int hash; 300 301 if (svc->flags & IP_VS_SVC_F_HASHED) { 302 pr_err("%s(): request for already hashed, called from %pF\n", 303 __func__, __builtin_return_address(0)); 304 return 0; 305 } 306 307 if (svc->fwmark == 0) { 308 /* 309 * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table 310 */ 311 hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, 312 &svc->addr, svc->port); 313 hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); 314 } else { 315 /* 316 * Hash it by fwmark in svc_fwm_table 317 */ 318 hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); 319 hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); 320 } 321 322 svc->flags |= IP_VS_SVC_F_HASHED; 323 /* increase its refcnt because it is referenced by the svc table */ 324 atomic_inc(&svc->refcnt); 325 return 1; 326 } 327 328 329 /* 330 * Unhashes a service from svc_table / svc_fwm_table. 331 * Should be called with locked tables. 332 */ 333 static int ip_vs_svc_unhash(struct ip_vs_service *svc) 334 { 335 if (!(svc->flags & IP_VS_SVC_F_HASHED)) { 336 pr_err("%s(): request for unhash flagged, called from %pF\n", 337 __func__, __builtin_return_address(0)); 338 return 0; 339 } 340 341 if (svc->fwmark == 0) { 342 /* Remove it from the svc_table table */ 343 hlist_del_rcu(&svc->s_list); 344 } else { 345 /* Remove it from the svc_fwm_table table */ 346 hlist_del_rcu(&svc->f_list); 347 } 348 349 svc->flags &= ~IP_VS_SVC_F_HASHED; 350 atomic_dec(&svc->refcnt); 351 return 1; 352 } 353 354 355 /* 356 * Get service by {netns, proto,addr,port} in the service table. 357 */ 358 static inline struct ip_vs_service * 359 __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, 360 const union nf_inet_addr *vaddr, __be16 vport) 361 { 362 unsigned int hash; 363 struct ip_vs_service *svc; 364 365 /* Check for "full" addressed entries */ 366 hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); 367 368 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { 369 if ((svc->af == af) 370 && ip_vs_addr_equal(af, &svc->addr, vaddr) 371 && (svc->port == vport) 372 && (svc->protocol == protocol) 373 && (svc->ipvs == ipvs)) { 374 /* HIT */ 375 return svc; 376 } 377 } 378 379 return NULL; 380 } 381 382 383 /* 384 * Get service by {fwmark} in the service table. 385 */ 386 static inline struct ip_vs_service * 387 __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) 388 { 389 unsigned int hash; 390 struct ip_vs_service *svc; 391 392 /* Check for fwmark addressed entries */ 393 hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); 394 395 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { 396 if (svc->fwmark == fwmark && svc->af == af 397 && (svc->ipvs == ipvs)) { 398 /* HIT */ 399 return svc; 400 } 401 } 402 403 return NULL; 404 } 405 406 /* Find service, called under RCU lock */ 407 struct ip_vs_service * 408 ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, 409 const union nf_inet_addr *vaddr, __be16 vport) 410 { 411 struct ip_vs_service *svc; 412 413 /* 414 * Check the table hashed by fwmark first 415 */ 416 if (fwmark) { 417 svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); 418 if (svc) 419 goto out; 420 } 421 422 /* 423 * Check the table hashed by <protocol,addr,port> 424 * for "full" addressed entries 425 */ 426 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); 427 428 if (svc == NULL 429 && protocol == IPPROTO_TCP 430 && atomic_read(&ipvs->ftpsvc_counter) 431 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) { 432 /* 433 * Check if ftp service entry exists, the packet 434 * might belong to FTP data connections. 435 */ 436 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); 437 } 438 439 if (svc == NULL 440 && atomic_read(&ipvs->nullsvc_counter)) { 441 /* 442 * Check if the catch-all port (port zero) exists 443 */ 444 svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, 0); 445 } 446 447 out: 448 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", 449 fwmark, ip_vs_proto_name(protocol), 450 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), 451 svc ? "hit" : "not hit"); 452 453 return svc; 454 } 455 456 457 static inline void 458 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) 459 { 460 atomic_inc(&svc->refcnt); 461 rcu_assign_pointer(dest->svc, svc); 462 } 463 464 static void ip_vs_service_free(struct ip_vs_service *svc) 465 { 466 free_percpu(svc->stats.cpustats); 467 kfree(svc); 468 } 469 470 static void ip_vs_service_rcu_free(struct rcu_head *head) 471 { 472 struct ip_vs_service *svc; 473 474 svc = container_of(head, struct ip_vs_service, rcu_head); 475 ip_vs_service_free(svc); 476 } 477 478 static void __ip_vs_svc_put(struct ip_vs_service *svc, bool do_delay) 479 { 480 if (atomic_dec_and_test(&svc->refcnt)) { 481 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u\n", 482 svc->fwmark, 483 IP_VS_DBG_ADDR(svc->af, &svc->addr), 484 ntohs(svc->port)); 485 if (do_delay) 486 call_rcu(&svc->rcu_head, ip_vs_service_rcu_free); 487 else 488 ip_vs_service_free(svc); 489 } 490 } 491 492 493 /* 494 * Returns hash value for real service 495 */ 496 static inline unsigned int ip_vs_rs_hashkey(int af, 497 const union nf_inet_addr *addr, 498 __be16 port) 499 { 500 register unsigned int porth = ntohs(port); 501 __be32 addr_fold = addr->ip; 502 503 #ifdef CONFIG_IP_VS_IPV6 504 if (af == AF_INET6) 505 addr_fold = addr->ip6[0]^addr->ip6[1]^ 506 addr->ip6[2]^addr->ip6[3]; 507 #endif 508 509 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) 510 & IP_VS_RTAB_MASK; 511 } 512 513 /* Hash ip_vs_dest in rs_table by <proto,addr,port>. */ 514 static void ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest) 515 { 516 unsigned int hash; 517 518 if (dest->in_rs_table) 519 return; 520 521 /* 522 * Hash by proto,addr,port, 523 * which are the parameters of the real service. 524 */ 525 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); 526 527 hlist_add_head_rcu(&dest->d_list, &ipvs->rs_table[hash]); 528 dest->in_rs_table = 1; 529 } 530 531 /* Unhash ip_vs_dest from rs_table. */ 532 static void ip_vs_rs_unhash(struct ip_vs_dest *dest) 533 { 534 /* 535 * Remove it from the rs_table table. 536 */ 537 if (dest->in_rs_table) { 538 hlist_del_rcu(&dest->d_list); 539 dest->in_rs_table = 0; 540 } 541 } 542 543 /* Check if real service by <proto,addr,port> is present */ 544 bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, 545 const union nf_inet_addr *daddr, __be16 dport) 546 { 547 unsigned int hash; 548 struct ip_vs_dest *dest; 549 550 /* Check for "full" addressed entries */ 551 hash = ip_vs_rs_hashkey(af, daddr, dport); 552 553 rcu_read_lock(); 554 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 555 if (dest->port == dport && 556 dest->af == af && 557 ip_vs_addr_equal(af, &dest->addr, daddr) && 558 (dest->protocol == protocol || dest->vfwmark)) { 559 /* HIT */ 560 rcu_read_unlock(); 561 return true; 562 } 563 } 564 rcu_read_unlock(); 565 566 return false; 567 } 568 569 /* Find real service record by <proto,addr,port>. 570 * In case of multiple records with the same <proto,addr,port>, only 571 * the first found record is returned. 572 * 573 * To be called under RCU lock. 574 */ 575 struct ip_vs_dest *ip_vs_find_real_service(struct netns_ipvs *ipvs, int af, 576 __u16 protocol, 577 const union nf_inet_addr *daddr, 578 __be16 dport) 579 { 580 unsigned int hash; 581 struct ip_vs_dest *dest; 582 583 /* Check for "full" addressed entries */ 584 hash = ip_vs_rs_hashkey(af, daddr, dport); 585 586 hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { 587 if (dest->port == dport && 588 dest->af == af && 589 ip_vs_addr_equal(af, &dest->addr, daddr) && 590 (dest->protocol == protocol || dest->vfwmark)) { 591 /* HIT */ 592 return dest; 593 } 594 } 595 596 return NULL; 597 } 598 599 /* Lookup destination by {addr,port} in the given service 600 * Called under RCU lock. 601 */ 602 static struct ip_vs_dest * 603 ip_vs_lookup_dest(struct ip_vs_service *svc, int dest_af, 604 const union nf_inet_addr *daddr, __be16 dport) 605 { 606 struct ip_vs_dest *dest; 607 608 /* 609 * Find the destination for the given service 610 */ 611 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 612 if ((dest->af == dest_af) && 613 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 614 (dest->port == dport)) { 615 /* HIT */ 616 return dest; 617 } 618 } 619 620 return NULL; 621 } 622 623 /* 624 * Find destination by {daddr,dport,vaddr,protocol} 625 * Created to be used in ip_vs_process_message() in 626 * the backup synchronization daemon. It finds the 627 * destination to be bound to the received connection 628 * on the backup. 629 * Called under RCU lock, no refcnt is returned. 630 */ 631 struct ip_vs_dest *ip_vs_find_dest(struct netns_ipvs *ipvs, int svc_af, int dest_af, 632 const union nf_inet_addr *daddr, 633 __be16 dport, 634 const union nf_inet_addr *vaddr, 635 __be16 vport, __u16 protocol, __u32 fwmark, 636 __u32 flags) 637 { 638 struct ip_vs_dest *dest; 639 struct ip_vs_service *svc; 640 __be16 port = dport; 641 642 svc = ip_vs_service_find(ipvs, svc_af, fwmark, protocol, vaddr, vport); 643 if (!svc) 644 return NULL; 645 if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) 646 port = 0; 647 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port); 648 if (!dest) 649 dest = ip_vs_lookup_dest(svc, dest_af, daddr, port ^ dport); 650 return dest; 651 } 652 653 void ip_vs_dest_dst_rcu_free(struct rcu_head *head) 654 { 655 struct ip_vs_dest_dst *dest_dst = container_of(head, 656 struct ip_vs_dest_dst, 657 rcu_head); 658 659 dst_release(dest_dst->dst_cache); 660 kfree(dest_dst); 661 } 662 663 /* Release dest_dst and dst_cache for dest in user context */ 664 static void __ip_vs_dst_cache_reset(struct ip_vs_dest *dest) 665 { 666 struct ip_vs_dest_dst *old; 667 668 old = rcu_dereference_protected(dest->dest_dst, 1); 669 if (old) { 670 RCU_INIT_POINTER(dest->dest_dst, NULL); 671 call_rcu(&old->rcu_head, ip_vs_dest_dst_rcu_free); 672 } 673 } 674 675 /* 676 * Lookup dest by {svc,addr,port} in the destination trash. 677 * The destination trash is used to hold the destinations that are removed 678 * from the service table but are still referenced by some conn entries. 679 * The reason to add the destination trash is when the dest is temporary 680 * down (either by administrator or by monitor program), the dest can be 681 * picked back from the trash, the remaining connections to the dest can 682 * continue, and the counting information of the dest is also useful for 683 * scheduling. 684 */ 685 static struct ip_vs_dest * 686 ip_vs_trash_get_dest(struct ip_vs_service *svc, int dest_af, 687 const union nf_inet_addr *daddr, __be16 dport) 688 { 689 struct ip_vs_dest *dest; 690 struct netns_ipvs *ipvs = svc->ipvs; 691 692 /* 693 * Find the destination in trash 694 */ 695 spin_lock_bh(&ipvs->dest_trash_lock); 696 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 697 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " 698 "dest->refcnt=%d\n", 699 dest->vfwmark, 700 IP_VS_DBG_ADDR(dest->af, &dest->addr), 701 ntohs(dest->port), 702 atomic_read(&dest->refcnt)); 703 if (dest->af == dest_af && 704 ip_vs_addr_equal(dest_af, &dest->addr, daddr) && 705 dest->port == dport && 706 dest->vfwmark == svc->fwmark && 707 dest->protocol == svc->protocol && 708 (svc->fwmark || 709 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && 710 dest->vport == svc->port))) { 711 /* HIT */ 712 list_del(&dest->t_list); 713 ip_vs_dest_hold(dest); 714 goto out; 715 } 716 } 717 718 dest = NULL; 719 720 out: 721 spin_unlock_bh(&ipvs->dest_trash_lock); 722 723 return dest; 724 } 725 726 static void ip_vs_dest_free(struct ip_vs_dest *dest) 727 { 728 struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); 729 730 __ip_vs_dst_cache_reset(dest); 731 __ip_vs_svc_put(svc, false); 732 free_percpu(dest->stats.cpustats); 733 ip_vs_dest_put_and_free(dest); 734 } 735 736 /* 737 * Clean up all the destinations in the trash 738 * Called by the ip_vs_control_cleanup() 739 * 740 * When the ip_vs_control_clearup is activated by ipvs module exit, 741 * the service tables must have been flushed and all the connections 742 * are expired, and the refcnt of each destination in the trash must 743 * be 0, so we simply release them here. 744 */ 745 static void ip_vs_trash_cleanup(struct netns_ipvs *ipvs) 746 { 747 struct ip_vs_dest *dest, *nxt; 748 749 del_timer_sync(&ipvs->dest_trash_timer); 750 /* No need to use dest_trash_lock */ 751 list_for_each_entry_safe(dest, nxt, &ipvs->dest_trash, t_list) { 752 list_del(&dest->t_list); 753 ip_vs_dest_free(dest); 754 } 755 } 756 757 static void 758 ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src) 759 { 760 #define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c 761 762 spin_lock_bh(&src->lock); 763 764 IP_VS_SHOW_STATS_COUNTER(conns); 765 IP_VS_SHOW_STATS_COUNTER(inpkts); 766 IP_VS_SHOW_STATS_COUNTER(outpkts); 767 IP_VS_SHOW_STATS_COUNTER(inbytes); 768 IP_VS_SHOW_STATS_COUNTER(outbytes); 769 770 ip_vs_read_estimator(dst, src); 771 772 spin_unlock_bh(&src->lock); 773 } 774 775 static void 776 ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src) 777 { 778 dst->conns = (u32)src->conns; 779 dst->inpkts = (u32)src->inpkts; 780 dst->outpkts = (u32)src->outpkts; 781 dst->inbytes = src->inbytes; 782 dst->outbytes = src->outbytes; 783 dst->cps = (u32)src->cps; 784 dst->inpps = (u32)src->inpps; 785 dst->outpps = (u32)src->outpps; 786 dst->inbps = (u32)src->inbps; 787 dst->outbps = (u32)src->outbps; 788 } 789 790 static void 791 ip_vs_zero_stats(struct ip_vs_stats *stats) 792 { 793 spin_lock_bh(&stats->lock); 794 795 /* get current counters as zero point, rates are zeroed */ 796 797 #define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c 798 799 IP_VS_ZERO_STATS_COUNTER(conns); 800 IP_VS_ZERO_STATS_COUNTER(inpkts); 801 IP_VS_ZERO_STATS_COUNTER(outpkts); 802 IP_VS_ZERO_STATS_COUNTER(inbytes); 803 IP_VS_ZERO_STATS_COUNTER(outbytes); 804 805 ip_vs_zero_estimator(stats); 806 807 spin_unlock_bh(&stats->lock); 808 } 809 810 /* 811 * Update a destination in the given service 812 */ 813 static void 814 __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, 815 struct ip_vs_dest_user_kern *udest, int add) 816 { 817 struct netns_ipvs *ipvs = svc->ipvs; 818 struct ip_vs_service *old_svc; 819 struct ip_vs_scheduler *sched; 820 int conn_flags; 821 822 /* We cannot modify an address and change the address family */ 823 BUG_ON(!add && udest->af != dest->af); 824 825 if (add && udest->af != svc->af) 826 ipvs->mixed_address_family_dests++; 827 828 /* set the weight and the flags */ 829 atomic_set(&dest->weight, udest->weight); 830 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; 831 conn_flags |= IP_VS_CONN_F_INACTIVE; 832 833 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ 834 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { 835 conn_flags |= IP_VS_CONN_F_NOOUTPUT; 836 } else { 837 /* 838 * Put the real service in rs_table if not present. 839 * For now only for NAT! 840 */ 841 ip_vs_rs_hash(ipvs, dest); 842 } 843 atomic_set(&dest->conn_flags, conn_flags); 844 845 /* bind the service */ 846 old_svc = rcu_dereference_protected(dest->svc, 1); 847 if (!old_svc) { 848 __ip_vs_bind_svc(dest, svc); 849 } else { 850 if (old_svc != svc) { 851 ip_vs_zero_stats(&dest->stats); 852 __ip_vs_bind_svc(dest, svc); 853 __ip_vs_svc_put(old_svc, true); 854 } 855 } 856 857 /* set the dest status flags */ 858 dest->flags |= IP_VS_DEST_F_AVAILABLE; 859 860 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold) 861 dest->flags &= ~IP_VS_DEST_F_OVERLOAD; 862 dest->u_threshold = udest->u_threshold; 863 dest->l_threshold = udest->l_threshold; 864 865 dest->af = udest->af; 866 867 spin_lock_bh(&dest->dst_lock); 868 __ip_vs_dst_cache_reset(dest); 869 spin_unlock_bh(&dest->dst_lock); 870 871 if (add) { 872 ip_vs_start_estimator(svc->ipvs, &dest->stats); 873 list_add_rcu(&dest->n_list, &svc->destinations); 874 svc->num_dests++; 875 sched = rcu_dereference_protected(svc->scheduler, 1); 876 if (sched && sched->add_dest) 877 sched->add_dest(svc, dest); 878 } else { 879 sched = rcu_dereference_protected(svc->scheduler, 1); 880 if (sched && sched->upd_dest) 881 sched->upd_dest(svc, dest); 882 } 883 } 884 885 886 /* 887 * Create a destination for the given service 888 */ 889 static int 890 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, 891 struct ip_vs_dest **dest_p) 892 { 893 struct ip_vs_dest *dest; 894 unsigned int atype, i; 895 896 EnterFunction(2); 897 898 #ifdef CONFIG_IP_VS_IPV6 899 if (udest->af == AF_INET6) { 900 atype = ipv6_addr_type(&udest->addr.in6); 901 if ((!(atype & IPV6_ADDR_UNICAST) || 902 atype & IPV6_ADDR_LINKLOCAL) && 903 !__ip_vs_addr_is_local_v6(svc->ipvs->net, &udest->addr.in6)) 904 return -EINVAL; 905 } else 906 #endif 907 { 908 atype = inet_addr_type(svc->ipvs->net, udest->addr.ip); 909 if (atype != RTN_LOCAL && atype != RTN_UNICAST) 910 return -EINVAL; 911 } 912 913 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL); 914 if (dest == NULL) 915 return -ENOMEM; 916 917 dest->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 918 if (!dest->stats.cpustats) 919 goto err_alloc; 920 921 for_each_possible_cpu(i) { 922 struct ip_vs_cpu_stats *ip_vs_dest_stats; 923 ip_vs_dest_stats = per_cpu_ptr(dest->stats.cpustats, i); 924 u64_stats_init(&ip_vs_dest_stats->syncp); 925 } 926 927 dest->af = udest->af; 928 dest->protocol = svc->protocol; 929 dest->vaddr = svc->addr; 930 dest->vport = svc->port; 931 dest->vfwmark = svc->fwmark; 932 ip_vs_addr_copy(udest->af, &dest->addr, &udest->addr); 933 dest->port = udest->port; 934 935 atomic_set(&dest->activeconns, 0); 936 atomic_set(&dest->inactconns, 0); 937 atomic_set(&dest->persistconns, 0); 938 atomic_set(&dest->refcnt, 1); 939 940 INIT_HLIST_NODE(&dest->d_list); 941 spin_lock_init(&dest->dst_lock); 942 spin_lock_init(&dest->stats.lock); 943 __ip_vs_update_dest(svc, dest, udest, 1); 944 945 *dest_p = dest; 946 947 LeaveFunction(2); 948 return 0; 949 950 err_alloc: 951 kfree(dest); 952 return -ENOMEM; 953 } 954 955 956 /* 957 * Add a destination into an existing service 958 */ 959 static int 960 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 961 { 962 struct ip_vs_dest *dest; 963 union nf_inet_addr daddr; 964 __be16 dport = udest->port; 965 int ret; 966 967 EnterFunction(2); 968 969 if (udest->weight < 0) { 970 pr_err("%s(): server weight less than zero\n", __func__); 971 return -ERANGE; 972 } 973 974 if (udest->l_threshold > udest->u_threshold) { 975 pr_err("%s(): lower threshold is higher than upper threshold\n", 976 __func__); 977 return -ERANGE; 978 } 979 980 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 981 982 /* We use function that requires RCU lock */ 983 rcu_read_lock(); 984 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 985 rcu_read_unlock(); 986 987 if (dest != NULL) { 988 IP_VS_DBG(1, "%s(): dest already exists\n", __func__); 989 return -EEXIST; 990 } 991 992 /* 993 * Check if the dest already exists in the trash and 994 * is from the same service 995 */ 996 dest = ip_vs_trash_get_dest(svc, udest->af, &daddr, dport); 997 998 if (dest != NULL) { 999 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " 1000 "dest->refcnt=%d, service %u/%s:%u\n", 1001 IP_VS_DBG_ADDR(udest->af, &daddr), ntohs(dport), 1002 atomic_read(&dest->refcnt), 1003 dest->vfwmark, 1004 IP_VS_DBG_ADDR(svc->af, &dest->vaddr), 1005 ntohs(dest->vport)); 1006 1007 __ip_vs_update_dest(svc, dest, udest, 1); 1008 ret = 0; 1009 } else { 1010 /* 1011 * Allocate and initialize the dest structure 1012 */ 1013 ret = ip_vs_new_dest(svc, udest, &dest); 1014 } 1015 LeaveFunction(2); 1016 1017 return ret; 1018 } 1019 1020 1021 /* 1022 * Edit a destination in the given service 1023 */ 1024 static int 1025 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1026 { 1027 struct ip_vs_dest *dest; 1028 union nf_inet_addr daddr; 1029 __be16 dport = udest->port; 1030 1031 EnterFunction(2); 1032 1033 if (udest->weight < 0) { 1034 pr_err("%s(): server weight less than zero\n", __func__); 1035 return -ERANGE; 1036 } 1037 1038 if (udest->l_threshold > udest->u_threshold) { 1039 pr_err("%s(): lower threshold is higher than upper threshold\n", 1040 __func__); 1041 return -ERANGE; 1042 } 1043 1044 ip_vs_addr_copy(udest->af, &daddr, &udest->addr); 1045 1046 /* We use function that requires RCU lock */ 1047 rcu_read_lock(); 1048 dest = ip_vs_lookup_dest(svc, udest->af, &daddr, dport); 1049 rcu_read_unlock(); 1050 1051 if (dest == NULL) { 1052 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__); 1053 return -ENOENT; 1054 } 1055 1056 __ip_vs_update_dest(svc, dest, udest, 0); 1057 LeaveFunction(2); 1058 1059 return 0; 1060 } 1061 1062 /* 1063 * Delete a destination (must be already unlinked from the service) 1064 */ 1065 static void __ip_vs_del_dest(struct netns_ipvs *ipvs, struct ip_vs_dest *dest, 1066 bool cleanup) 1067 { 1068 ip_vs_stop_estimator(ipvs, &dest->stats); 1069 1070 /* 1071 * Remove it from the d-linked list with the real services. 1072 */ 1073 ip_vs_rs_unhash(dest); 1074 1075 spin_lock_bh(&ipvs->dest_trash_lock); 1076 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, dest->refcnt=%d\n", 1077 IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), 1078 atomic_read(&dest->refcnt)); 1079 if (list_empty(&ipvs->dest_trash) && !cleanup) 1080 mod_timer(&ipvs->dest_trash_timer, 1081 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1082 /* dest lives in trash without reference */ 1083 list_add(&dest->t_list, &ipvs->dest_trash); 1084 dest->idle_start = 0; 1085 spin_unlock_bh(&ipvs->dest_trash_lock); 1086 ip_vs_dest_put(dest); 1087 } 1088 1089 1090 /* 1091 * Unlink a destination from the given service 1092 */ 1093 static void __ip_vs_unlink_dest(struct ip_vs_service *svc, 1094 struct ip_vs_dest *dest, 1095 int svcupd) 1096 { 1097 dest->flags &= ~IP_VS_DEST_F_AVAILABLE; 1098 1099 /* 1100 * Remove it from the d-linked destination list. 1101 */ 1102 list_del_rcu(&dest->n_list); 1103 svc->num_dests--; 1104 1105 if (dest->af != svc->af) 1106 svc->ipvs->mixed_address_family_dests--; 1107 1108 if (svcupd) { 1109 struct ip_vs_scheduler *sched; 1110 1111 sched = rcu_dereference_protected(svc->scheduler, 1); 1112 if (sched && sched->del_dest) 1113 sched->del_dest(svc, dest); 1114 } 1115 } 1116 1117 1118 /* 1119 * Delete a destination server in the given service 1120 */ 1121 static int 1122 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) 1123 { 1124 struct ip_vs_dest *dest; 1125 __be16 dport = udest->port; 1126 1127 EnterFunction(2); 1128 1129 /* We use function that requires RCU lock */ 1130 rcu_read_lock(); 1131 dest = ip_vs_lookup_dest(svc, udest->af, &udest->addr, dport); 1132 rcu_read_unlock(); 1133 1134 if (dest == NULL) { 1135 IP_VS_DBG(1, "%s(): destination not found!\n", __func__); 1136 return -ENOENT; 1137 } 1138 1139 /* 1140 * Unlink dest from the service 1141 */ 1142 __ip_vs_unlink_dest(svc, dest, 1); 1143 1144 /* 1145 * Delete the destination 1146 */ 1147 __ip_vs_del_dest(svc->ipvs, dest, false); 1148 1149 LeaveFunction(2); 1150 1151 return 0; 1152 } 1153 1154 static void ip_vs_dest_trash_expire(unsigned long data) 1155 { 1156 struct netns_ipvs *ipvs = (struct netns_ipvs *)data; 1157 struct ip_vs_dest *dest, *next; 1158 unsigned long now = jiffies; 1159 1160 spin_lock(&ipvs->dest_trash_lock); 1161 list_for_each_entry_safe(dest, next, &ipvs->dest_trash, t_list) { 1162 if (atomic_read(&dest->refcnt) > 0) 1163 continue; 1164 if (dest->idle_start) { 1165 if (time_before(now, dest->idle_start + 1166 IP_VS_DEST_TRASH_PERIOD)) 1167 continue; 1168 } else { 1169 dest->idle_start = max(1UL, now); 1170 continue; 1171 } 1172 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u from trash\n", 1173 dest->vfwmark, 1174 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1175 ntohs(dest->port)); 1176 list_del(&dest->t_list); 1177 ip_vs_dest_free(dest); 1178 } 1179 if (!list_empty(&ipvs->dest_trash)) 1180 mod_timer(&ipvs->dest_trash_timer, 1181 jiffies + (IP_VS_DEST_TRASH_PERIOD >> 1)); 1182 spin_unlock(&ipvs->dest_trash_lock); 1183 } 1184 1185 /* 1186 * Add a service into the service hash table 1187 */ 1188 static int 1189 ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, 1190 struct ip_vs_service **svc_p) 1191 { 1192 int ret = 0, i; 1193 struct ip_vs_scheduler *sched = NULL; 1194 struct ip_vs_pe *pe = NULL; 1195 struct ip_vs_service *svc = NULL; 1196 1197 /* increase the module use count */ 1198 ip_vs_use_count_inc(); 1199 1200 /* Lookup the scheduler by 'u->sched_name' */ 1201 if (strcmp(u->sched_name, "none")) { 1202 sched = ip_vs_scheduler_get(u->sched_name); 1203 if (!sched) { 1204 pr_info("Scheduler module ip_vs_%s not found\n", 1205 u->sched_name); 1206 ret = -ENOENT; 1207 goto out_err; 1208 } 1209 } 1210 1211 if (u->pe_name && *u->pe_name) { 1212 pe = ip_vs_pe_getbyname(u->pe_name); 1213 if (pe == NULL) { 1214 pr_info("persistence engine module ip_vs_pe_%s " 1215 "not found\n", u->pe_name); 1216 ret = -ENOENT; 1217 goto out_err; 1218 } 1219 } 1220 1221 #ifdef CONFIG_IP_VS_IPV6 1222 if (u->af == AF_INET6) { 1223 __u32 plen = (__force __u32) u->netmask; 1224 1225 if (plen < 1 || plen > 128) { 1226 ret = -EINVAL; 1227 goto out_err; 1228 } 1229 } 1230 #endif 1231 1232 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL); 1233 if (svc == NULL) { 1234 IP_VS_DBG(1, "%s(): no memory\n", __func__); 1235 ret = -ENOMEM; 1236 goto out_err; 1237 } 1238 svc->stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 1239 if (!svc->stats.cpustats) { 1240 ret = -ENOMEM; 1241 goto out_err; 1242 } 1243 1244 for_each_possible_cpu(i) { 1245 struct ip_vs_cpu_stats *ip_vs_stats; 1246 ip_vs_stats = per_cpu_ptr(svc->stats.cpustats, i); 1247 u64_stats_init(&ip_vs_stats->syncp); 1248 } 1249 1250 1251 /* I'm the first user of the service */ 1252 atomic_set(&svc->refcnt, 0); 1253 1254 svc->af = u->af; 1255 svc->protocol = u->protocol; 1256 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); 1257 svc->port = u->port; 1258 svc->fwmark = u->fwmark; 1259 svc->flags = u->flags & ~IP_VS_SVC_F_HASHED; 1260 svc->timeout = u->timeout * HZ; 1261 svc->netmask = u->netmask; 1262 svc->ipvs = ipvs; 1263 1264 INIT_LIST_HEAD(&svc->destinations); 1265 spin_lock_init(&svc->sched_lock); 1266 spin_lock_init(&svc->stats.lock); 1267 1268 /* Bind the scheduler */ 1269 if (sched) { 1270 ret = ip_vs_bind_scheduler(svc, sched); 1271 if (ret) 1272 goto out_err; 1273 sched = NULL; 1274 } 1275 1276 /* Bind the ct retriever */ 1277 RCU_INIT_POINTER(svc->pe, pe); 1278 pe = NULL; 1279 1280 /* Update the virtual service counters */ 1281 if (svc->port == FTPPORT) 1282 atomic_inc(&ipvs->ftpsvc_counter); 1283 else if (svc->port == 0) 1284 atomic_inc(&ipvs->nullsvc_counter); 1285 if (svc->pe && svc->pe->conn_out) 1286 atomic_inc(&ipvs->conn_out_counter); 1287 1288 ip_vs_start_estimator(ipvs, &svc->stats); 1289 1290 /* Count only IPv4 services for old get/setsockopt interface */ 1291 if (svc->af == AF_INET) 1292 ipvs->num_services++; 1293 1294 /* Hash the service into the service table */ 1295 ip_vs_svc_hash(svc); 1296 1297 *svc_p = svc; 1298 /* Now there is a service - full throttle */ 1299 ipvs->enable = 1; 1300 return 0; 1301 1302 1303 out_err: 1304 if (svc != NULL) { 1305 ip_vs_unbind_scheduler(svc, sched); 1306 ip_vs_service_free(svc); 1307 } 1308 ip_vs_scheduler_put(sched); 1309 ip_vs_pe_put(pe); 1310 1311 /* decrease the module use count */ 1312 ip_vs_use_count_dec(); 1313 1314 return ret; 1315 } 1316 1317 1318 /* 1319 * Edit a service and bind it with a new scheduler 1320 */ 1321 static int 1322 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) 1323 { 1324 struct ip_vs_scheduler *sched = NULL, *old_sched; 1325 struct ip_vs_pe *pe = NULL, *old_pe = NULL; 1326 int ret = 0; 1327 bool new_pe_conn_out, old_pe_conn_out; 1328 1329 /* 1330 * Lookup the scheduler, by 'u->sched_name' 1331 */ 1332 if (strcmp(u->sched_name, "none")) { 1333 sched = ip_vs_scheduler_get(u->sched_name); 1334 if (!sched) { 1335 pr_info("Scheduler module ip_vs_%s not found\n", 1336 u->sched_name); 1337 return -ENOENT; 1338 } 1339 } 1340 old_sched = sched; 1341 1342 if (u->pe_name && *u->pe_name) { 1343 pe = ip_vs_pe_getbyname(u->pe_name); 1344 if (pe == NULL) { 1345 pr_info("persistence engine module ip_vs_pe_%s " 1346 "not found\n", u->pe_name); 1347 ret = -ENOENT; 1348 goto out; 1349 } 1350 old_pe = pe; 1351 } 1352 1353 #ifdef CONFIG_IP_VS_IPV6 1354 if (u->af == AF_INET6) { 1355 __u32 plen = (__force __u32) u->netmask; 1356 1357 if (plen < 1 || plen > 128) { 1358 ret = -EINVAL; 1359 goto out; 1360 } 1361 } 1362 #endif 1363 1364 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1365 if (sched != old_sched) { 1366 if (old_sched) { 1367 ip_vs_unbind_scheduler(svc, old_sched); 1368 RCU_INIT_POINTER(svc->scheduler, NULL); 1369 /* Wait all svc->sched_data users */ 1370 synchronize_rcu(); 1371 } 1372 /* Bind the new scheduler */ 1373 if (sched) { 1374 ret = ip_vs_bind_scheduler(svc, sched); 1375 if (ret) { 1376 ip_vs_scheduler_put(sched); 1377 goto out; 1378 } 1379 } 1380 } 1381 1382 /* 1383 * Set the flags and timeout value 1384 */ 1385 svc->flags = u->flags | IP_VS_SVC_F_HASHED; 1386 svc->timeout = u->timeout * HZ; 1387 svc->netmask = u->netmask; 1388 1389 old_pe = rcu_dereference_protected(svc->pe, 1); 1390 if (pe != old_pe) { 1391 rcu_assign_pointer(svc->pe, pe); 1392 /* check for optional methods in new pe */ 1393 new_pe_conn_out = (pe && pe->conn_out) ? true : false; 1394 old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; 1395 if (new_pe_conn_out && !old_pe_conn_out) 1396 atomic_inc(&svc->ipvs->conn_out_counter); 1397 if (old_pe_conn_out && !new_pe_conn_out) 1398 atomic_dec(&svc->ipvs->conn_out_counter); 1399 } 1400 1401 out: 1402 ip_vs_scheduler_put(old_sched); 1403 ip_vs_pe_put(old_pe); 1404 return ret; 1405 } 1406 1407 /* 1408 * Delete a service from the service list 1409 * - The service must be unlinked, unlocked and not referenced! 1410 * - We are called under _bh lock 1411 */ 1412 static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) 1413 { 1414 struct ip_vs_dest *dest, *nxt; 1415 struct ip_vs_scheduler *old_sched; 1416 struct ip_vs_pe *old_pe; 1417 struct netns_ipvs *ipvs = svc->ipvs; 1418 1419 /* Count only IPv4 services for old get/setsockopt interface */ 1420 if (svc->af == AF_INET) 1421 ipvs->num_services--; 1422 1423 ip_vs_stop_estimator(svc->ipvs, &svc->stats); 1424 1425 /* Unbind scheduler */ 1426 old_sched = rcu_dereference_protected(svc->scheduler, 1); 1427 ip_vs_unbind_scheduler(svc, old_sched); 1428 ip_vs_scheduler_put(old_sched); 1429 1430 /* Unbind persistence engine, keep svc->pe */ 1431 old_pe = rcu_dereference_protected(svc->pe, 1); 1432 if (old_pe && old_pe->conn_out) 1433 atomic_dec(&ipvs->conn_out_counter); 1434 ip_vs_pe_put(old_pe); 1435 1436 /* 1437 * Unlink the whole destination list 1438 */ 1439 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) { 1440 __ip_vs_unlink_dest(svc, dest, 0); 1441 __ip_vs_del_dest(svc->ipvs, dest, cleanup); 1442 } 1443 1444 /* 1445 * Update the virtual service counters 1446 */ 1447 if (svc->port == FTPPORT) 1448 atomic_dec(&ipvs->ftpsvc_counter); 1449 else if (svc->port == 0) 1450 atomic_dec(&ipvs->nullsvc_counter); 1451 1452 /* 1453 * Free the service if nobody refers to it 1454 */ 1455 __ip_vs_svc_put(svc, true); 1456 1457 /* decrease the module use count */ 1458 ip_vs_use_count_dec(); 1459 } 1460 1461 /* 1462 * Unlink a service from list and try to delete it if its refcnt reached 0 1463 */ 1464 static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) 1465 { 1466 /* Hold svc to avoid double release from dest_trash */ 1467 atomic_inc(&svc->refcnt); 1468 /* 1469 * Unhash it from the service table 1470 */ 1471 ip_vs_svc_unhash(svc); 1472 1473 __ip_vs_del_service(svc, cleanup); 1474 } 1475 1476 /* 1477 * Delete a service from the service list 1478 */ 1479 static int ip_vs_del_service(struct ip_vs_service *svc) 1480 { 1481 if (svc == NULL) 1482 return -EEXIST; 1483 ip_vs_unlink_service(svc, false); 1484 1485 return 0; 1486 } 1487 1488 1489 /* 1490 * Flush all the virtual services 1491 */ 1492 static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) 1493 { 1494 int idx; 1495 struct ip_vs_service *svc; 1496 struct hlist_node *n; 1497 1498 /* 1499 * Flush the service table hashed by <netns,protocol,addr,port> 1500 */ 1501 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1502 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], 1503 s_list) { 1504 if (svc->ipvs == ipvs) 1505 ip_vs_unlink_service(svc, cleanup); 1506 } 1507 } 1508 1509 /* 1510 * Flush the service table hashed by fwmark 1511 */ 1512 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1513 hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], 1514 f_list) { 1515 if (svc->ipvs == ipvs) 1516 ip_vs_unlink_service(svc, cleanup); 1517 } 1518 } 1519 1520 return 0; 1521 } 1522 1523 /* 1524 * Delete service by {netns} in the service table. 1525 * Called by __ip_vs_cleanup() 1526 */ 1527 void ip_vs_service_net_cleanup(struct netns_ipvs *ipvs) 1528 { 1529 EnterFunction(2); 1530 /* Check for "full" addressed entries */ 1531 mutex_lock(&__ip_vs_mutex); 1532 ip_vs_flush(ipvs, true); 1533 mutex_unlock(&__ip_vs_mutex); 1534 LeaveFunction(2); 1535 } 1536 1537 /* Put all references for device (dst_cache) */ 1538 static inline void 1539 ip_vs_forget_dev(struct ip_vs_dest *dest, struct net_device *dev) 1540 { 1541 struct ip_vs_dest_dst *dest_dst; 1542 1543 spin_lock_bh(&dest->dst_lock); 1544 dest_dst = rcu_dereference_protected(dest->dest_dst, 1); 1545 if (dest_dst && dest_dst->dst_cache->dev == dev) { 1546 IP_VS_DBG_BUF(3, "Reset dev:%s dest %s:%u ,dest->refcnt=%d\n", 1547 dev->name, 1548 IP_VS_DBG_ADDR(dest->af, &dest->addr), 1549 ntohs(dest->port), 1550 atomic_read(&dest->refcnt)); 1551 __ip_vs_dst_cache_reset(dest); 1552 } 1553 spin_unlock_bh(&dest->dst_lock); 1554 1555 } 1556 /* Netdev event receiver 1557 * Currently only NETDEV_DOWN is handled to release refs to cached dsts 1558 */ 1559 static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, 1560 void *ptr) 1561 { 1562 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 1563 struct net *net = dev_net(dev); 1564 struct netns_ipvs *ipvs = net_ipvs(net); 1565 struct ip_vs_service *svc; 1566 struct ip_vs_dest *dest; 1567 unsigned int idx; 1568 1569 if (event != NETDEV_DOWN || !ipvs) 1570 return NOTIFY_DONE; 1571 IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); 1572 EnterFunction(2); 1573 mutex_lock(&__ip_vs_mutex); 1574 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1575 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1576 if (svc->ipvs == ipvs) { 1577 list_for_each_entry(dest, &svc->destinations, 1578 n_list) { 1579 ip_vs_forget_dev(dest, dev); 1580 } 1581 } 1582 } 1583 1584 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1585 if (svc->ipvs == ipvs) { 1586 list_for_each_entry(dest, &svc->destinations, 1587 n_list) { 1588 ip_vs_forget_dev(dest, dev); 1589 } 1590 } 1591 1592 } 1593 } 1594 1595 spin_lock_bh(&ipvs->dest_trash_lock); 1596 list_for_each_entry(dest, &ipvs->dest_trash, t_list) { 1597 ip_vs_forget_dev(dest, dev); 1598 } 1599 spin_unlock_bh(&ipvs->dest_trash_lock); 1600 mutex_unlock(&__ip_vs_mutex); 1601 LeaveFunction(2); 1602 return NOTIFY_DONE; 1603 } 1604 1605 /* 1606 * Zero counters in a service or all services 1607 */ 1608 static int ip_vs_zero_service(struct ip_vs_service *svc) 1609 { 1610 struct ip_vs_dest *dest; 1611 1612 list_for_each_entry(dest, &svc->destinations, n_list) { 1613 ip_vs_zero_stats(&dest->stats); 1614 } 1615 ip_vs_zero_stats(&svc->stats); 1616 return 0; 1617 } 1618 1619 static int ip_vs_zero_all(struct netns_ipvs *ipvs) 1620 { 1621 int idx; 1622 struct ip_vs_service *svc; 1623 1624 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1625 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 1626 if (svc->ipvs == ipvs) 1627 ip_vs_zero_service(svc); 1628 } 1629 } 1630 1631 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1632 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 1633 if (svc->ipvs == ipvs) 1634 ip_vs_zero_service(svc); 1635 } 1636 } 1637 1638 ip_vs_zero_stats(&ipvs->tot_stats); 1639 return 0; 1640 } 1641 1642 #ifdef CONFIG_SYSCTL 1643 1644 static int zero; 1645 static int three = 3; 1646 1647 static int 1648 proc_do_defense_mode(struct ctl_table *table, int write, 1649 void __user *buffer, size_t *lenp, loff_t *ppos) 1650 { 1651 struct netns_ipvs *ipvs = table->extra2; 1652 int *valp = table->data; 1653 int val = *valp; 1654 int rc; 1655 1656 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1657 if (write && (*valp != val)) { 1658 if ((*valp < 0) || (*valp > 3)) { 1659 /* Restore the correct value */ 1660 *valp = val; 1661 } else { 1662 update_defense_level(ipvs); 1663 } 1664 } 1665 return rc; 1666 } 1667 1668 static int 1669 proc_do_sync_threshold(struct ctl_table *table, int write, 1670 void __user *buffer, size_t *lenp, loff_t *ppos) 1671 { 1672 int *valp = table->data; 1673 int val[2]; 1674 int rc; 1675 1676 /* backup the value first */ 1677 memcpy(val, valp, sizeof(val)); 1678 1679 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1680 if (write && (valp[0] < 0 || valp[1] < 0 || 1681 (valp[0] >= valp[1] && valp[1]))) { 1682 /* Restore the correct value */ 1683 memcpy(valp, val, sizeof(val)); 1684 } 1685 return rc; 1686 } 1687 1688 static int 1689 proc_do_sync_mode(struct ctl_table *table, int write, 1690 void __user *buffer, size_t *lenp, loff_t *ppos) 1691 { 1692 int *valp = table->data; 1693 int val = *valp; 1694 int rc; 1695 1696 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1697 if (write && (*valp != val)) { 1698 if ((*valp < 0) || (*valp > 1)) { 1699 /* Restore the correct value */ 1700 *valp = val; 1701 } 1702 } 1703 return rc; 1704 } 1705 1706 static int 1707 proc_do_sync_ports(struct ctl_table *table, int write, 1708 void __user *buffer, size_t *lenp, loff_t *ppos) 1709 { 1710 int *valp = table->data; 1711 int val = *valp; 1712 int rc; 1713 1714 rc = proc_dointvec(table, write, buffer, lenp, ppos); 1715 if (write && (*valp != val)) { 1716 if (*valp < 1 || !is_power_of_2(*valp)) { 1717 /* Restore the correct value */ 1718 *valp = val; 1719 } 1720 } 1721 return rc; 1722 } 1723 1724 /* 1725 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) 1726 * Do not change order or insert new entries without 1727 * align with netns init in ip_vs_control_net_init() 1728 */ 1729 1730 static struct ctl_table vs_vars[] = { 1731 { 1732 .procname = "amemthresh", 1733 .maxlen = sizeof(int), 1734 .mode = 0644, 1735 .proc_handler = proc_dointvec, 1736 }, 1737 { 1738 .procname = "am_droprate", 1739 .maxlen = sizeof(int), 1740 .mode = 0644, 1741 .proc_handler = proc_dointvec, 1742 }, 1743 { 1744 .procname = "drop_entry", 1745 .maxlen = sizeof(int), 1746 .mode = 0644, 1747 .proc_handler = proc_do_defense_mode, 1748 }, 1749 { 1750 .procname = "drop_packet", 1751 .maxlen = sizeof(int), 1752 .mode = 0644, 1753 .proc_handler = proc_do_defense_mode, 1754 }, 1755 #ifdef CONFIG_IP_VS_NFCT 1756 { 1757 .procname = "conntrack", 1758 .maxlen = sizeof(int), 1759 .mode = 0644, 1760 .proc_handler = &proc_dointvec, 1761 }, 1762 #endif 1763 { 1764 .procname = "secure_tcp", 1765 .maxlen = sizeof(int), 1766 .mode = 0644, 1767 .proc_handler = proc_do_defense_mode, 1768 }, 1769 { 1770 .procname = "snat_reroute", 1771 .maxlen = sizeof(int), 1772 .mode = 0644, 1773 .proc_handler = &proc_dointvec, 1774 }, 1775 { 1776 .procname = "sync_version", 1777 .maxlen = sizeof(int), 1778 .mode = 0644, 1779 .proc_handler = &proc_do_sync_mode, 1780 }, 1781 { 1782 .procname = "sync_ports", 1783 .maxlen = sizeof(int), 1784 .mode = 0644, 1785 .proc_handler = &proc_do_sync_ports, 1786 }, 1787 { 1788 .procname = "sync_persist_mode", 1789 .maxlen = sizeof(int), 1790 .mode = 0644, 1791 .proc_handler = proc_dointvec, 1792 }, 1793 { 1794 .procname = "sync_qlen_max", 1795 .maxlen = sizeof(unsigned long), 1796 .mode = 0644, 1797 .proc_handler = proc_doulongvec_minmax, 1798 }, 1799 { 1800 .procname = "sync_sock_size", 1801 .maxlen = sizeof(int), 1802 .mode = 0644, 1803 .proc_handler = proc_dointvec, 1804 }, 1805 { 1806 .procname = "cache_bypass", 1807 .maxlen = sizeof(int), 1808 .mode = 0644, 1809 .proc_handler = proc_dointvec, 1810 }, 1811 { 1812 .procname = "expire_nodest_conn", 1813 .maxlen = sizeof(int), 1814 .mode = 0644, 1815 .proc_handler = proc_dointvec, 1816 }, 1817 { 1818 .procname = "sloppy_tcp", 1819 .maxlen = sizeof(int), 1820 .mode = 0644, 1821 .proc_handler = proc_dointvec, 1822 }, 1823 { 1824 .procname = "sloppy_sctp", 1825 .maxlen = sizeof(int), 1826 .mode = 0644, 1827 .proc_handler = proc_dointvec, 1828 }, 1829 { 1830 .procname = "expire_quiescent_template", 1831 .maxlen = sizeof(int), 1832 .mode = 0644, 1833 .proc_handler = proc_dointvec, 1834 }, 1835 { 1836 .procname = "sync_threshold", 1837 .maxlen = 1838 sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold), 1839 .mode = 0644, 1840 .proc_handler = proc_do_sync_threshold, 1841 }, 1842 { 1843 .procname = "sync_refresh_period", 1844 .maxlen = sizeof(int), 1845 .mode = 0644, 1846 .proc_handler = proc_dointvec_jiffies, 1847 }, 1848 { 1849 .procname = "sync_retries", 1850 .maxlen = sizeof(int), 1851 .mode = 0644, 1852 .proc_handler = proc_dointvec_minmax, 1853 .extra1 = &zero, 1854 .extra2 = &three, 1855 }, 1856 { 1857 .procname = "nat_icmp_send", 1858 .maxlen = sizeof(int), 1859 .mode = 0644, 1860 .proc_handler = proc_dointvec, 1861 }, 1862 { 1863 .procname = "pmtu_disc", 1864 .maxlen = sizeof(int), 1865 .mode = 0644, 1866 .proc_handler = proc_dointvec, 1867 }, 1868 { 1869 .procname = "backup_only", 1870 .maxlen = sizeof(int), 1871 .mode = 0644, 1872 .proc_handler = proc_dointvec, 1873 }, 1874 { 1875 .procname = "conn_reuse_mode", 1876 .maxlen = sizeof(int), 1877 .mode = 0644, 1878 .proc_handler = proc_dointvec, 1879 }, 1880 { 1881 .procname = "schedule_icmp", 1882 .maxlen = sizeof(int), 1883 .mode = 0644, 1884 .proc_handler = proc_dointvec, 1885 }, 1886 { 1887 .procname = "ignore_tunneled", 1888 .maxlen = sizeof(int), 1889 .mode = 0644, 1890 .proc_handler = proc_dointvec, 1891 }, 1892 #ifdef CONFIG_IP_VS_DEBUG 1893 { 1894 .procname = "debug_level", 1895 .data = &sysctl_ip_vs_debug_level, 1896 .maxlen = sizeof(int), 1897 .mode = 0644, 1898 .proc_handler = proc_dointvec, 1899 }, 1900 #endif 1901 { } 1902 }; 1903 1904 #endif 1905 1906 #ifdef CONFIG_PROC_FS 1907 1908 struct ip_vs_iter { 1909 struct seq_net_private p; /* Do not move this, netns depends upon it*/ 1910 struct hlist_head *table; 1911 int bucket; 1912 }; 1913 1914 /* 1915 * Write the contents of the VS rule table to a PROCfs file. 1916 * (It is kept just for backward compatibility) 1917 */ 1918 static inline const char *ip_vs_fwd_name(unsigned int flags) 1919 { 1920 switch (flags & IP_VS_CONN_F_FWD_MASK) { 1921 case IP_VS_CONN_F_LOCALNODE: 1922 return "Local"; 1923 case IP_VS_CONN_F_TUNNEL: 1924 return "Tunnel"; 1925 case IP_VS_CONN_F_DROUTE: 1926 return "Route"; 1927 default: 1928 return "Masq"; 1929 } 1930 } 1931 1932 1933 /* Get the Nth entry in the two lists */ 1934 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) 1935 { 1936 struct net *net = seq_file_net(seq); 1937 struct netns_ipvs *ipvs = net_ipvs(net); 1938 struct ip_vs_iter *iter = seq->private; 1939 int idx; 1940 struct ip_vs_service *svc; 1941 1942 /* look in hash by protocol */ 1943 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1944 hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { 1945 if ((svc->ipvs == ipvs) && pos-- == 0) { 1946 iter->table = ip_vs_svc_table; 1947 iter->bucket = idx; 1948 return svc; 1949 } 1950 } 1951 } 1952 1953 /* keep looking in fwmark */ 1954 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 1955 hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], 1956 f_list) { 1957 if ((svc->ipvs == ipvs) && pos-- == 0) { 1958 iter->table = ip_vs_svc_fwm_table; 1959 iter->bucket = idx; 1960 return svc; 1961 } 1962 } 1963 } 1964 1965 return NULL; 1966 } 1967 1968 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) 1969 __acquires(RCU) 1970 { 1971 rcu_read_lock(); 1972 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; 1973 } 1974 1975 1976 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1977 { 1978 struct hlist_node *e; 1979 struct ip_vs_iter *iter; 1980 struct ip_vs_service *svc; 1981 1982 ++*pos; 1983 if (v == SEQ_START_TOKEN) 1984 return ip_vs_info_array(seq,0); 1985 1986 svc = v; 1987 iter = seq->private; 1988 1989 if (iter->table == ip_vs_svc_table) { 1990 /* next service in table hashed by protocol */ 1991 e = rcu_dereference(hlist_next_rcu(&svc->s_list)); 1992 if (e) 1993 return hlist_entry(e, struct ip_vs_service, s_list); 1994 1995 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 1996 hlist_for_each_entry_rcu(svc, 1997 &ip_vs_svc_table[iter->bucket], 1998 s_list) { 1999 return svc; 2000 } 2001 } 2002 2003 iter->table = ip_vs_svc_fwm_table; 2004 iter->bucket = -1; 2005 goto scan_fwmark; 2006 } 2007 2008 /* next service in hashed by fwmark */ 2009 e = rcu_dereference(hlist_next_rcu(&svc->f_list)); 2010 if (e) 2011 return hlist_entry(e, struct ip_vs_service, f_list); 2012 2013 scan_fwmark: 2014 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { 2015 hlist_for_each_entry_rcu(svc, 2016 &ip_vs_svc_fwm_table[iter->bucket], 2017 f_list) 2018 return svc; 2019 } 2020 2021 return NULL; 2022 } 2023 2024 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) 2025 __releases(RCU) 2026 { 2027 rcu_read_unlock(); 2028 } 2029 2030 2031 static int ip_vs_info_seq_show(struct seq_file *seq, void *v) 2032 { 2033 if (v == SEQ_START_TOKEN) { 2034 seq_printf(seq, 2035 "IP Virtual Server version %d.%d.%d (size=%d)\n", 2036 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2037 seq_puts(seq, 2038 "Prot LocalAddress:Port Scheduler Flags\n"); 2039 seq_puts(seq, 2040 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); 2041 } else { 2042 struct net *net = seq_file_net(seq); 2043 struct netns_ipvs *ipvs = net_ipvs(net); 2044 const struct ip_vs_service *svc = v; 2045 const struct ip_vs_iter *iter = seq->private; 2046 const struct ip_vs_dest *dest; 2047 struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); 2048 char *sched_name = sched ? sched->name : "none"; 2049 2050 if (svc->ipvs != ipvs) 2051 return 0; 2052 if (iter->table == ip_vs_svc_table) { 2053 #ifdef CONFIG_IP_VS_IPV6 2054 if (svc->af == AF_INET6) 2055 seq_printf(seq, "%s [%pI6]:%04X %s ", 2056 ip_vs_proto_name(svc->protocol), 2057 &svc->addr.in6, 2058 ntohs(svc->port), 2059 sched_name); 2060 else 2061 #endif 2062 seq_printf(seq, "%s %08X:%04X %s %s ", 2063 ip_vs_proto_name(svc->protocol), 2064 ntohl(svc->addr.ip), 2065 ntohs(svc->port), 2066 sched_name, 2067 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2068 } else { 2069 seq_printf(seq, "FWM %08X %s %s", 2070 svc->fwmark, sched_name, 2071 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":""); 2072 } 2073 2074 if (svc->flags & IP_VS_SVC_F_PERSISTENT) 2075 seq_printf(seq, "persistent %d %08X\n", 2076 svc->timeout, 2077 ntohl(svc->netmask)); 2078 else 2079 seq_putc(seq, '\n'); 2080 2081 list_for_each_entry_rcu(dest, &svc->destinations, n_list) { 2082 #ifdef CONFIG_IP_VS_IPV6 2083 if (dest->af == AF_INET6) 2084 seq_printf(seq, 2085 " -> [%pI6]:%04X" 2086 " %-7s %-6d %-10d %-10d\n", 2087 &dest->addr.in6, 2088 ntohs(dest->port), 2089 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2090 atomic_read(&dest->weight), 2091 atomic_read(&dest->activeconns), 2092 atomic_read(&dest->inactconns)); 2093 else 2094 #endif 2095 seq_printf(seq, 2096 " -> %08X:%04X " 2097 "%-7s %-6d %-10d %-10d\n", 2098 ntohl(dest->addr.ip), 2099 ntohs(dest->port), 2100 ip_vs_fwd_name(atomic_read(&dest->conn_flags)), 2101 atomic_read(&dest->weight), 2102 atomic_read(&dest->activeconns), 2103 atomic_read(&dest->inactconns)); 2104 2105 } 2106 } 2107 return 0; 2108 } 2109 2110 static const struct seq_operations ip_vs_info_seq_ops = { 2111 .start = ip_vs_info_seq_start, 2112 .next = ip_vs_info_seq_next, 2113 .stop = ip_vs_info_seq_stop, 2114 .show = ip_vs_info_seq_show, 2115 }; 2116 2117 static int ip_vs_info_open(struct inode *inode, struct file *file) 2118 { 2119 return seq_open_net(inode, file, &ip_vs_info_seq_ops, 2120 sizeof(struct ip_vs_iter)); 2121 } 2122 2123 static const struct file_operations ip_vs_info_fops = { 2124 .owner = THIS_MODULE, 2125 .open = ip_vs_info_open, 2126 .read = seq_read, 2127 .llseek = seq_lseek, 2128 .release = seq_release_net, 2129 }; 2130 2131 static int ip_vs_stats_show(struct seq_file *seq, void *v) 2132 { 2133 struct net *net = seq_file_single_net(seq); 2134 struct ip_vs_kstats show; 2135 2136 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2137 seq_puts(seq, 2138 " Total Incoming Outgoing Incoming Outgoing\n"); 2139 seq_printf(seq, 2140 " Conns Packets Packets Bytes Bytes\n"); 2141 2142 ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats); 2143 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n", 2144 (unsigned long long)show.conns, 2145 (unsigned long long)show.inpkts, 2146 (unsigned long long)show.outpkts, 2147 (unsigned long long)show.inbytes, 2148 (unsigned long long)show.outbytes); 2149 2150 /* 01234567 01234567 01234567 0123456701234567 0123456701234567*/ 2151 seq_puts(seq, 2152 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2153 seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n", 2154 (unsigned long long)show.cps, 2155 (unsigned long long)show.inpps, 2156 (unsigned long long)show.outpps, 2157 (unsigned long long)show.inbps, 2158 (unsigned long long)show.outbps); 2159 2160 return 0; 2161 } 2162 2163 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file) 2164 { 2165 return single_open_net(inode, file, ip_vs_stats_show); 2166 } 2167 2168 static const struct file_operations ip_vs_stats_fops = { 2169 .owner = THIS_MODULE, 2170 .open = ip_vs_stats_seq_open, 2171 .read = seq_read, 2172 .llseek = seq_lseek, 2173 .release = single_release_net, 2174 }; 2175 2176 static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) 2177 { 2178 struct net *net = seq_file_single_net(seq); 2179 struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats; 2180 struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats; 2181 struct ip_vs_kstats kstats; 2182 int i; 2183 2184 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2185 seq_puts(seq, 2186 " Total Incoming Outgoing Incoming Outgoing\n"); 2187 seq_printf(seq, 2188 "CPU Conns Packets Packets Bytes Bytes\n"); 2189 2190 for_each_possible_cpu(i) { 2191 struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i); 2192 unsigned int start; 2193 u64 conns, inpkts, outpkts, inbytes, outbytes; 2194 2195 do { 2196 start = u64_stats_fetch_begin_irq(&u->syncp); 2197 conns = u->cnt.conns; 2198 inpkts = u->cnt.inpkts; 2199 outpkts = u->cnt.outpkts; 2200 inbytes = u->cnt.inbytes; 2201 outbytes = u->cnt.outbytes; 2202 } while (u64_stats_fetch_retry_irq(&u->syncp, start)); 2203 2204 seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n", 2205 i, (u64)conns, (u64)inpkts, 2206 (u64)outpkts, (u64)inbytes, 2207 (u64)outbytes); 2208 } 2209 2210 ip_vs_copy_stats(&kstats, tot_stats); 2211 2212 seq_printf(seq, " ~ %8LX %8LX %8LX %16LX %16LX\n\n", 2213 (unsigned long long)kstats.conns, 2214 (unsigned long long)kstats.inpkts, 2215 (unsigned long long)kstats.outpkts, 2216 (unsigned long long)kstats.inbytes, 2217 (unsigned long long)kstats.outbytes); 2218 2219 /* ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */ 2220 seq_puts(seq, 2221 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); 2222 seq_printf(seq, " %8LX %8LX %8LX %16LX %16LX\n", 2223 kstats.cps, 2224 kstats.inpps, 2225 kstats.outpps, 2226 kstats.inbps, 2227 kstats.outbps); 2228 2229 return 0; 2230 } 2231 2232 static int ip_vs_stats_percpu_seq_open(struct inode *inode, struct file *file) 2233 { 2234 return single_open_net(inode, file, ip_vs_stats_percpu_show); 2235 } 2236 2237 static const struct file_operations ip_vs_stats_percpu_fops = { 2238 .owner = THIS_MODULE, 2239 .open = ip_vs_stats_percpu_seq_open, 2240 .read = seq_read, 2241 .llseek = seq_lseek, 2242 .release = single_release_net, 2243 }; 2244 #endif 2245 2246 /* 2247 * Set timeout values for tcp tcpfin udp in the timeout_table. 2248 */ 2249 static int ip_vs_set_timeout(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2250 { 2251 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2252 struct ip_vs_proto_data *pd; 2253 #endif 2254 2255 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n", 2256 u->tcp_timeout, 2257 u->tcp_fin_timeout, 2258 u->udp_timeout); 2259 2260 #ifdef CONFIG_IP_VS_PROTO_TCP 2261 if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2262 u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2263 return -EINVAL; 2264 } 2265 #endif 2266 2267 #ifdef CONFIG_IP_VS_PROTO_UDP 2268 if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2269 return -EINVAL; 2270 #endif 2271 2272 #ifdef CONFIG_IP_VS_PROTO_TCP 2273 if (u->tcp_timeout) { 2274 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2275 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] 2276 = u->tcp_timeout * HZ; 2277 } 2278 2279 if (u->tcp_fin_timeout) { 2280 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2281 pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] 2282 = u->tcp_fin_timeout * HZ; 2283 } 2284 #endif 2285 2286 #ifdef CONFIG_IP_VS_PROTO_UDP 2287 if (u->udp_timeout) { 2288 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2289 pd->timeout_table[IP_VS_UDP_S_NORMAL] 2290 = u->udp_timeout * HZ; 2291 } 2292 #endif 2293 return 0; 2294 } 2295 2296 #define CMDID(cmd) (cmd - IP_VS_BASE_CTL) 2297 2298 struct ip_vs_svcdest_user { 2299 struct ip_vs_service_user s; 2300 struct ip_vs_dest_user d; 2301 }; 2302 2303 static const unsigned char set_arglen[CMDID(IP_VS_SO_SET_MAX) + 1] = { 2304 [CMDID(IP_VS_SO_SET_ADD)] = sizeof(struct ip_vs_service_user), 2305 [CMDID(IP_VS_SO_SET_EDIT)] = sizeof(struct ip_vs_service_user), 2306 [CMDID(IP_VS_SO_SET_DEL)] = sizeof(struct ip_vs_service_user), 2307 [CMDID(IP_VS_SO_SET_ADDDEST)] = sizeof(struct ip_vs_svcdest_user), 2308 [CMDID(IP_VS_SO_SET_DELDEST)] = sizeof(struct ip_vs_svcdest_user), 2309 [CMDID(IP_VS_SO_SET_EDITDEST)] = sizeof(struct ip_vs_svcdest_user), 2310 [CMDID(IP_VS_SO_SET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2311 [CMDID(IP_VS_SO_SET_STARTDAEMON)] = sizeof(struct ip_vs_daemon_user), 2312 [CMDID(IP_VS_SO_SET_STOPDAEMON)] = sizeof(struct ip_vs_daemon_user), 2313 [CMDID(IP_VS_SO_SET_ZERO)] = sizeof(struct ip_vs_service_user), 2314 }; 2315 2316 union ip_vs_set_arglen { 2317 struct ip_vs_service_user field_IP_VS_SO_SET_ADD; 2318 struct ip_vs_service_user field_IP_VS_SO_SET_EDIT; 2319 struct ip_vs_service_user field_IP_VS_SO_SET_DEL; 2320 struct ip_vs_svcdest_user field_IP_VS_SO_SET_ADDDEST; 2321 struct ip_vs_svcdest_user field_IP_VS_SO_SET_DELDEST; 2322 struct ip_vs_svcdest_user field_IP_VS_SO_SET_EDITDEST; 2323 struct ip_vs_timeout_user field_IP_VS_SO_SET_TIMEOUT; 2324 struct ip_vs_daemon_user field_IP_VS_SO_SET_STARTDAEMON; 2325 struct ip_vs_daemon_user field_IP_VS_SO_SET_STOPDAEMON; 2326 struct ip_vs_service_user field_IP_VS_SO_SET_ZERO; 2327 }; 2328 2329 #define MAX_SET_ARGLEN sizeof(union ip_vs_set_arglen) 2330 2331 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, 2332 struct ip_vs_service_user *usvc_compat) 2333 { 2334 memset(usvc, 0, sizeof(*usvc)); 2335 2336 usvc->af = AF_INET; 2337 usvc->protocol = usvc_compat->protocol; 2338 usvc->addr.ip = usvc_compat->addr; 2339 usvc->port = usvc_compat->port; 2340 usvc->fwmark = usvc_compat->fwmark; 2341 2342 /* Deep copy of sched_name is not needed here */ 2343 usvc->sched_name = usvc_compat->sched_name; 2344 2345 usvc->flags = usvc_compat->flags; 2346 usvc->timeout = usvc_compat->timeout; 2347 usvc->netmask = usvc_compat->netmask; 2348 } 2349 2350 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, 2351 struct ip_vs_dest_user *udest_compat) 2352 { 2353 memset(udest, 0, sizeof(*udest)); 2354 2355 udest->addr.ip = udest_compat->addr; 2356 udest->port = udest_compat->port; 2357 udest->conn_flags = udest_compat->conn_flags; 2358 udest->weight = udest_compat->weight; 2359 udest->u_threshold = udest_compat->u_threshold; 2360 udest->l_threshold = udest_compat->l_threshold; 2361 udest->af = AF_INET; 2362 } 2363 2364 static int 2365 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) 2366 { 2367 struct net *net = sock_net(sk); 2368 int ret; 2369 unsigned char arg[MAX_SET_ARGLEN]; 2370 struct ip_vs_service_user *usvc_compat; 2371 struct ip_vs_service_user_kern usvc; 2372 struct ip_vs_service *svc; 2373 struct ip_vs_dest_user *udest_compat; 2374 struct ip_vs_dest_user_kern udest; 2375 struct netns_ipvs *ipvs = net_ipvs(net); 2376 2377 BUILD_BUG_ON(sizeof(arg) > 255); 2378 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2379 return -EPERM; 2380 2381 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX) 2382 return -EINVAL; 2383 if (len != set_arglen[CMDID(cmd)]) { 2384 IP_VS_DBG(1, "set_ctl: len %u != %u\n", 2385 len, set_arglen[CMDID(cmd)]); 2386 return -EINVAL; 2387 } 2388 2389 if (copy_from_user(arg, user, len) != 0) 2390 return -EFAULT; 2391 2392 /* increase the module use count */ 2393 ip_vs_use_count_inc(); 2394 2395 /* Handle daemons since they have another lock */ 2396 if (cmd == IP_VS_SO_SET_STARTDAEMON || 2397 cmd == IP_VS_SO_SET_STOPDAEMON) { 2398 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg; 2399 2400 if (cmd == IP_VS_SO_SET_STARTDAEMON) { 2401 struct ipvs_sync_daemon_cfg cfg; 2402 2403 memset(&cfg, 0, sizeof(cfg)); 2404 ret = -EINVAL; 2405 if (strscpy(cfg.mcast_ifn, dm->mcast_ifn, 2406 sizeof(cfg.mcast_ifn)) <= 0) 2407 goto out_dec; 2408 cfg.syncid = dm->syncid; 2409 ret = start_sync_thread(ipvs, &cfg, dm->state); 2410 } else { 2411 mutex_lock(&ipvs->sync_mutex); 2412 ret = stop_sync_thread(ipvs, dm->state); 2413 mutex_unlock(&ipvs->sync_mutex); 2414 } 2415 goto out_dec; 2416 } 2417 2418 mutex_lock(&__ip_vs_mutex); 2419 if (cmd == IP_VS_SO_SET_FLUSH) { 2420 /* Flush the virtual service */ 2421 ret = ip_vs_flush(ipvs, false); 2422 goto out_unlock; 2423 } else if (cmd == IP_VS_SO_SET_TIMEOUT) { 2424 /* Set timeout values for (tcp tcpfin udp) */ 2425 ret = ip_vs_set_timeout(ipvs, (struct ip_vs_timeout_user *)arg); 2426 goto out_unlock; 2427 } else if (!len) { 2428 /* No more commands with len == 0 below */ 2429 ret = -EINVAL; 2430 goto out_unlock; 2431 } 2432 2433 usvc_compat = (struct ip_vs_service_user *)arg; 2434 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); 2435 2436 /* We only use the new structs internally, so copy userspace compat 2437 * structs to extended internal versions */ 2438 ip_vs_copy_usvc_compat(&usvc, usvc_compat); 2439 ip_vs_copy_udest_compat(&udest, udest_compat); 2440 2441 if (cmd == IP_VS_SO_SET_ZERO) { 2442 /* if no service address is set, zero counters in all */ 2443 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { 2444 ret = ip_vs_zero_all(ipvs); 2445 goto out_unlock; 2446 } 2447 } 2448 2449 if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) && 2450 strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) == 2451 IP_VS_SCHEDNAME_MAXLEN) { 2452 ret = -EINVAL; 2453 goto out_unlock; 2454 } 2455 2456 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */ 2457 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP && 2458 usvc.protocol != IPPROTO_SCTP) { 2459 pr_err("set_ctl: invalid protocol: %d %pI4:%d\n", 2460 usvc.protocol, &usvc.addr.ip, 2461 ntohs(usvc.port)); 2462 ret = -EFAULT; 2463 goto out_unlock; 2464 } 2465 2466 /* Lookup the exact service by <protocol, addr, port> or fwmark */ 2467 rcu_read_lock(); 2468 if (usvc.fwmark == 0) 2469 svc = __ip_vs_service_find(ipvs, usvc.af, usvc.protocol, 2470 &usvc.addr, usvc.port); 2471 else 2472 svc = __ip_vs_svc_fwm_find(ipvs, usvc.af, usvc.fwmark); 2473 rcu_read_unlock(); 2474 2475 if (cmd != IP_VS_SO_SET_ADD 2476 && (svc == NULL || svc->protocol != usvc.protocol)) { 2477 ret = -ESRCH; 2478 goto out_unlock; 2479 } 2480 2481 switch (cmd) { 2482 case IP_VS_SO_SET_ADD: 2483 if (svc != NULL) 2484 ret = -EEXIST; 2485 else 2486 ret = ip_vs_add_service(ipvs, &usvc, &svc); 2487 break; 2488 case IP_VS_SO_SET_EDIT: 2489 ret = ip_vs_edit_service(svc, &usvc); 2490 break; 2491 case IP_VS_SO_SET_DEL: 2492 ret = ip_vs_del_service(svc); 2493 if (!ret) 2494 goto out_unlock; 2495 break; 2496 case IP_VS_SO_SET_ZERO: 2497 ret = ip_vs_zero_service(svc); 2498 break; 2499 case IP_VS_SO_SET_ADDDEST: 2500 ret = ip_vs_add_dest(svc, &udest); 2501 break; 2502 case IP_VS_SO_SET_EDITDEST: 2503 ret = ip_vs_edit_dest(svc, &udest); 2504 break; 2505 case IP_VS_SO_SET_DELDEST: 2506 ret = ip_vs_del_dest(svc, &udest); 2507 } 2508 2509 out_unlock: 2510 mutex_unlock(&__ip_vs_mutex); 2511 out_dec: 2512 /* decrease the module use count */ 2513 ip_vs_use_count_dec(); 2514 2515 return ret; 2516 } 2517 2518 2519 static void 2520 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) 2521 { 2522 struct ip_vs_scheduler *sched; 2523 struct ip_vs_kstats kstats; 2524 char *sched_name; 2525 2526 sched = rcu_dereference_protected(src->scheduler, 1); 2527 sched_name = sched ? sched->name : "none"; 2528 dst->protocol = src->protocol; 2529 dst->addr = src->addr.ip; 2530 dst->port = src->port; 2531 dst->fwmark = src->fwmark; 2532 strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); 2533 dst->flags = src->flags; 2534 dst->timeout = src->timeout / HZ; 2535 dst->netmask = src->netmask; 2536 dst->num_dests = src->num_dests; 2537 ip_vs_copy_stats(&kstats, &src->stats); 2538 ip_vs_export_stats_user(&dst->stats, &kstats); 2539 } 2540 2541 static inline int 2542 __ip_vs_get_service_entries(struct netns_ipvs *ipvs, 2543 const struct ip_vs_get_services *get, 2544 struct ip_vs_get_services __user *uptr) 2545 { 2546 int idx, count=0; 2547 struct ip_vs_service *svc; 2548 struct ip_vs_service_entry entry; 2549 int ret = 0; 2550 2551 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2552 hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { 2553 /* Only expose IPv4 entries to old interface */ 2554 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2555 continue; 2556 2557 if (count >= get->num_services) 2558 goto out; 2559 memset(&entry, 0, sizeof(entry)); 2560 ip_vs_copy_service(&entry, svc); 2561 if (copy_to_user(&uptr->entrytable[count], 2562 &entry, sizeof(entry))) { 2563 ret = -EFAULT; 2564 goto out; 2565 } 2566 count++; 2567 } 2568 } 2569 2570 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 2571 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { 2572 /* Only expose IPv4 entries to old interface */ 2573 if (svc->af != AF_INET || (svc->ipvs != ipvs)) 2574 continue; 2575 2576 if (count >= get->num_services) 2577 goto out; 2578 memset(&entry, 0, sizeof(entry)); 2579 ip_vs_copy_service(&entry, svc); 2580 if (copy_to_user(&uptr->entrytable[count], 2581 &entry, sizeof(entry))) { 2582 ret = -EFAULT; 2583 goto out; 2584 } 2585 count++; 2586 } 2587 } 2588 out: 2589 return ret; 2590 } 2591 2592 static inline int 2593 __ip_vs_get_dest_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_dests *get, 2594 struct ip_vs_get_dests __user *uptr) 2595 { 2596 struct ip_vs_service *svc; 2597 union nf_inet_addr addr = { .ip = get->addr }; 2598 int ret = 0; 2599 2600 rcu_read_lock(); 2601 if (get->fwmark) 2602 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, get->fwmark); 2603 else 2604 svc = __ip_vs_service_find(ipvs, AF_INET, get->protocol, &addr, 2605 get->port); 2606 rcu_read_unlock(); 2607 2608 if (svc) { 2609 int count = 0; 2610 struct ip_vs_dest *dest; 2611 struct ip_vs_dest_entry entry; 2612 struct ip_vs_kstats kstats; 2613 2614 memset(&entry, 0, sizeof(entry)); 2615 list_for_each_entry(dest, &svc->destinations, n_list) { 2616 if (count >= get->num_dests) 2617 break; 2618 2619 /* Cannot expose heterogeneous members via sockopt 2620 * interface 2621 */ 2622 if (dest->af != svc->af) 2623 continue; 2624 2625 entry.addr = dest->addr.ip; 2626 entry.port = dest->port; 2627 entry.conn_flags = atomic_read(&dest->conn_flags); 2628 entry.weight = atomic_read(&dest->weight); 2629 entry.u_threshold = dest->u_threshold; 2630 entry.l_threshold = dest->l_threshold; 2631 entry.activeconns = atomic_read(&dest->activeconns); 2632 entry.inactconns = atomic_read(&dest->inactconns); 2633 entry.persistconns = atomic_read(&dest->persistconns); 2634 ip_vs_copy_stats(&kstats, &dest->stats); 2635 ip_vs_export_stats_user(&entry.stats, &kstats); 2636 if (copy_to_user(&uptr->entrytable[count], 2637 &entry, sizeof(entry))) { 2638 ret = -EFAULT; 2639 break; 2640 } 2641 count++; 2642 } 2643 } else 2644 ret = -ESRCH; 2645 return ret; 2646 } 2647 2648 static inline void 2649 __ip_vs_get_timeouts(struct netns_ipvs *ipvs, struct ip_vs_timeout_user *u) 2650 { 2651 #if defined(CONFIG_IP_VS_PROTO_TCP) || defined(CONFIG_IP_VS_PROTO_UDP) 2652 struct ip_vs_proto_data *pd; 2653 #endif 2654 2655 memset(u, 0, sizeof (*u)); 2656 2657 #ifdef CONFIG_IP_VS_PROTO_TCP 2658 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2659 u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ; 2660 u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ; 2661 #endif 2662 #ifdef CONFIG_IP_VS_PROTO_UDP 2663 pd = ip_vs_proto_data_get(ipvs, IPPROTO_UDP); 2664 u->udp_timeout = 2665 pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ; 2666 #endif 2667 } 2668 2669 static const unsigned char get_arglen[CMDID(IP_VS_SO_GET_MAX) + 1] = { 2670 [CMDID(IP_VS_SO_GET_VERSION)] = 64, 2671 [CMDID(IP_VS_SO_GET_INFO)] = sizeof(struct ip_vs_getinfo), 2672 [CMDID(IP_VS_SO_GET_SERVICES)] = sizeof(struct ip_vs_get_services), 2673 [CMDID(IP_VS_SO_GET_SERVICE)] = sizeof(struct ip_vs_service_entry), 2674 [CMDID(IP_VS_SO_GET_DESTS)] = sizeof(struct ip_vs_get_dests), 2675 [CMDID(IP_VS_SO_GET_TIMEOUT)] = sizeof(struct ip_vs_timeout_user), 2676 [CMDID(IP_VS_SO_GET_DAEMON)] = 2 * sizeof(struct ip_vs_daemon_user), 2677 }; 2678 2679 union ip_vs_get_arglen { 2680 char field_IP_VS_SO_GET_VERSION[64]; 2681 struct ip_vs_getinfo field_IP_VS_SO_GET_INFO; 2682 struct ip_vs_get_services field_IP_VS_SO_GET_SERVICES; 2683 struct ip_vs_service_entry field_IP_VS_SO_GET_SERVICE; 2684 struct ip_vs_get_dests field_IP_VS_SO_GET_DESTS; 2685 struct ip_vs_timeout_user field_IP_VS_SO_GET_TIMEOUT; 2686 struct ip_vs_daemon_user field_IP_VS_SO_GET_DAEMON[2]; 2687 }; 2688 2689 #define MAX_GET_ARGLEN sizeof(union ip_vs_get_arglen) 2690 2691 static int 2692 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) 2693 { 2694 unsigned char arg[MAX_GET_ARGLEN]; 2695 int ret = 0; 2696 unsigned int copylen; 2697 struct net *net = sock_net(sk); 2698 struct netns_ipvs *ipvs = net_ipvs(net); 2699 2700 BUG_ON(!net); 2701 BUILD_BUG_ON(sizeof(arg) > 255); 2702 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) 2703 return -EPERM; 2704 2705 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX) 2706 return -EINVAL; 2707 2708 copylen = get_arglen[CMDID(cmd)]; 2709 if (*len < (int) copylen) { 2710 IP_VS_DBG(1, "get_ctl: len %d < %u\n", *len, copylen); 2711 return -EINVAL; 2712 } 2713 2714 if (copy_from_user(arg, user, copylen) != 0) 2715 return -EFAULT; 2716 /* 2717 * Handle daemons first since it has its own locking 2718 */ 2719 if (cmd == IP_VS_SO_GET_DAEMON) { 2720 struct ip_vs_daemon_user d[2]; 2721 2722 memset(&d, 0, sizeof(d)); 2723 mutex_lock(&ipvs->sync_mutex); 2724 if (ipvs->sync_state & IP_VS_STATE_MASTER) { 2725 d[0].state = IP_VS_STATE_MASTER; 2726 strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, 2727 sizeof(d[0].mcast_ifn)); 2728 d[0].syncid = ipvs->mcfg.syncid; 2729 } 2730 if (ipvs->sync_state & IP_VS_STATE_BACKUP) { 2731 d[1].state = IP_VS_STATE_BACKUP; 2732 strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, 2733 sizeof(d[1].mcast_ifn)); 2734 d[1].syncid = ipvs->bcfg.syncid; 2735 } 2736 if (copy_to_user(user, &d, sizeof(d)) != 0) 2737 ret = -EFAULT; 2738 mutex_unlock(&ipvs->sync_mutex); 2739 return ret; 2740 } 2741 2742 mutex_lock(&__ip_vs_mutex); 2743 switch (cmd) { 2744 case IP_VS_SO_GET_VERSION: 2745 { 2746 char buf[64]; 2747 2748 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", 2749 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); 2750 if (copy_to_user(user, buf, strlen(buf)+1) != 0) { 2751 ret = -EFAULT; 2752 goto out; 2753 } 2754 *len = strlen(buf)+1; 2755 } 2756 break; 2757 2758 case IP_VS_SO_GET_INFO: 2759 { 2760 struct ip_vs_getinfo info; 2761 info.version = IP_VS_VERSION_CODE; 2762 info.size = ip_vs_conn_tab_size; 2763 info.num_services = ipvs->num_services; 2764 if (copy_to_user(user, &info, sizeof(info)) != 0) 2765 ret = -EFAULT; 2766 } 2767 break; 2768 2769 case IP_VS_SO_GET_SERVICES: 2770 { 2771 struct ip_vs_get_services *get; 2772 int size; 2773 2774 get = (struct ip_vs_get_services *)arg; 2775 size = sizeof(*get) + 2776 sizeof(struct ip_vs_service_entry) * get->num_services; 2777 if (*len != size) { 2778 pr_err("length: %u != %u\n", *len, size); 2779 ret = -EINVAL; 2780 goto out; 2781 } 2782 ret = __ip_vs_get_service_entries(ipvs, get, user); 2783 } 2784 break; 2785 2786 case IP_VS_SO_GET_SERVICE: 2787 { 2788 struct ip_vs_service_entry *entry; 2789 struct ip_vs_service *svc; 2790 union nf_inet_addr addr; 2791 2792 entry = (struct ip_vs_service_entry *)arg; 2793 addr.ip = entry->addr; 2794 rcu_read_lock(); 2795 if (entry->fwmark) 2796 svc = __ip_vs_svc_fwm_find(ipvs, AF_INET, entry->fwmark); 2797 else 2798 svc = __ip_vs_service_find(ipvs, AF_INET, 2799 entry->protocol, &addr, 2800 entry->port); 2801 rcu_read_unlock(); 2802 if (svc) { 2803 ip_vs_copy_service(entry, svc); 2804 if (copy_to_user(user, entry, sizeof(*entry)) != 0) 2805 ret = -EFAULT; 2806 } else 2807 ret = -ESRCH; 2808 } 2809 break; 2810 2811 case IP_VS_SO_GET_DESTS: 2812 { 2813 struct ip_vs_get_dests *get; 2814 int size; 2815 2816 get = (struct ip_vs_get_dests *)arg; 2817 size = sizeof(*get) + 2818 sizeof(struct ip_vs_dest_entry) * get->num_dests; 2819 if (*len != size) { 2820 pr_err("length: %u != %u\n", *len, size); 2821 ret = -EINVAL; 2822 goto out; 2823 } 2824 ret = __ip_vs_get_dest_entries(ipvs, get, user); 2825 } 2826 break; 2827 2828 case IP_VS_SO_GET_TIMEOUT: 2829 { 2830 struct ip_vs_timeout_user t; 2831 2832 __ip_vs_get_timeouts(ipvs, &t); 2833 if (copy_to_user(user, &t, sizeof(t)) != 0) 2834 ret = -EFAULT; 2835 } 2836 break; 2837 2838 default: 2839 ret = -EINVAL; 2840 } 2841 2842 out: 2843 mutex_unlock(&__ip_vs_mutex); 2844 return ret; 2845 } 2846 2847 2848 static struct nf_sockopt_ops ip_vs_sockopts = { 2849 .pf = PF_INET, 2850 .set_optmin = IP_VS_BASE_CTL, 2851 .set_optmax = IP_VS_SO_SET_MAX+1, 2852 .set = do_ip_vs_set_ctl, 2853 .get_optmin = IP_VS_BASE_CTL, 2854 .get_optmax = IP_VS_SO_GET_MAX+1, 2855 .get = do_ip_vs_get_ctl, 2856 .owner = THIS_MODULE, 2857 }; 2858 2859 /* 2860 * Generic Netlink interface 2861 */ 2862 2863 /* IPVS genetlink family */ 2864 static struct genl_family ip_vs_genl_family = { 2865 .id = GENL_ID_GENERATE, 2866 .hdrsize = 0, 2867 .name = IPVS_GENL_NAME, 2868 .version = IPVS_GENL_VERSION, 2869 .maxattr = IPVS_CMD_ATTR_MAX, 2870 .netnsok = true, /* Make ipvsadm to work on netns */ 2871 }; 2872 2873 /* Policy used for first-level command attributes */ 2874 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { 2875 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, 2876 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, 2877 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, 2878 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, 2879 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, 2880 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, 2881 }; 2882 2883 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ 2884 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { 2885 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, 2886 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, 2887 .len = IP_VS_IFNAME_MAXLEN - 1 }, 2888 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, 2889 [IPVS_DAEMON_ATTR_SYNC_MAXLEN] = { .type = NLA_U16 }, 2890 [IPVS_DAEMON_ATTR_MCAST_GROUP] = { .type = NLA_U32 }, 2891 [IPVS_DAEMON_ATTR_MCAST_GROUP6] = { .len = sizeof(struct in6_addr) }, 2892 [IPVS_DAEMON_ATTR_MCAST_PORT] = { .type = NLA_U16 }, 2893 [IPVS_DAEMON_ATTR_MCAST_TTL] = { .type = NLA_U8 }, 2894 }; 2895 2896 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ 2897 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { 2898 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, 2899 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, 2900 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, 2901 .len = sizeof(union nf_inet_addr) }, 2902 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, 2903 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, 2904 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, 2905 .len = IP_VS_SCHEDNAME_MAXLEN - 1 }, 2906 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, 2907 .len = IP_VS_PENAME_MAXLEN }, 2908 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, 2909 .len = sizeof(struct ip_vs_flags) }, 2910 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, 2911 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, 2912 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, 2913 }; 2914 2915 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ 2916 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { 2917 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, 2918 .len = sizeof(union nf_inet_addr) }, 2919 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, 2920 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, 2921 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, 2922 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, 2923 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, 2924 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, 2925 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, 2926 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, 2927 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, 2928 [IPVS_DEST_ATTR_ADDR_FAMILY] = { .type = NLA_U16 }, 2929 }; 2930 2931 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, 2932 struct ip_vs_kstats *kstats) 2933 { 2934 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2935 2936 if (!nl_stats) 2937 return -EMSGSIZE; 2938 2939 if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) || 2940 nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) || 2941 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) || 2942 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 2943 IPVS_STATS_ATTR_PAD) || 2944 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 2945 IPVS_STATS_ATTR_PAD) || 2946 nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) || 2947 nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) || 2948 nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) || 2949 nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) || 2950 nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps)) 2951 goto nla_put_failure; 2952 nla_nest_end(skb, nl_stats); 2953 2954 return 0; 2955 2956 nla_put_failure: 2957 nla_nest_cancel(skb, nl_stats); 2958 return -EMSGSIZE; 2959 } 2960 2961 static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type, 2962 struct ip_vs_kstats *kstats) 2963 { 2964 struct nlattr *nl_stats = nla_nest_start(skb, container_type); 2965 2966 if (!nl_stats) 2967 return -EMSGSIZE; 2968 2969 if (nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CONNS, kstats->conns, 2970 IPVS_STATS_ATTR_PAD) || 2971 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts, 2972 IPVS_STATS_ATTR_PAD) || 2973 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts, 2974 IPVS_STATS_ATTR_PAD) || 2975 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes, 2976 IPVS_STATS_ATTR_PAD) || 2977 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes, 2978 IPVS_STATS_ATTR_PAD) || 2979 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_CPS, kstats->cps, 2980 IPVS_STATS_ATTR_PAD) || 2981 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps, 2982 IPVS_STATS_ATTR_PAD) || 2983 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps, 2984 IPVS_STATS_ATTR_PAD) || 2985 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps, 2986 IPVS_STATS_ATTR_PAD) || 2987 nla_put_u64_64bit(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps, 2988 IPVS_STATS_ATTR_PAD)) 2989 goto nla_put_failure; 2990 nla_nest_end(skb, nl_stats); 2991 2992 return 0; 2993 2994 nla_put_failure: 2995 nla_nest_cancel(skb, nl_stats); 2996 return -EMSGSIZE; 2997 } 2998 2999 static int ip_vs_genl_fill_service(struct sk_buff *skb, 3000 struct ip_vs_service *svc) 3001 { 3002 struct ip_vs_scheduler *sched; 3003 struct ip_vs_pe *pe; 3004 struct nlattr *nl_service; 3005 struct ip_vs_flags flags = { .flags = svc->flags, 3006 .mask = ~0 }; 3007 struct ip_vs_kstats kstats; 3008 char *sched_name; 3009 3010 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); 3011 if (!nl_service) 3012 return -EMSGSIZE; 3013 3014 if (nla_put_u16(skb, IPVS_SVC_ATTR_AF, svc->af)) 3015 goto nla_put_failure; 3016 if (svc->fwmark) { 3017 if (nla_put_u32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark)) 3018 goto nla_put_failure; 3019 } else { 3020 if (nla_put_u16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol) || 3021 nla_put(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr) || 3022 nla_put_be16(skb, IPVS_SVC_ATTR_PORT, svc->port)) 3023 goto nla_put_failure; 3024 } 3025 3026 sched = rcu_dereference_protected(svc->scheduler, 1); 3027 sched_name = sched ? sched->name : "none"; 3028 pe = rcu_dereference_protected(svc->pe, 1); 3029 if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || 3030 (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || 3031 nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || 3032 nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) || 3033 nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask)) 3034 goto nla_put_failure; 3035 ip_vs_copy_stats(&kstats, &svc->stats); 3036 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats)) 3037 goto nla_put_failure; 3038 if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats)) 3039 goto nla_put_failure; 3040 3041 nla_nest_end(skb, nl_service); 3042 3043 return 0; 3044 3045 nla_put_failure: 3046 nla_nest_cancel(skb, nl_service); 3047 return -EMSGSIZE; 3048 } 3049 3050 static int ip_vs_genl_dump_service(struct sk_buff *skb, 3051 struct ip_vs_service *svc, 3052 struct netlink_callback *cb) 3053 { 3054 void *hdr; 3055 3056 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3057 &ip_vs_genl_family, NLM_F_MULTI, 3058 IPVS_CMD_NEW_SERVICE); 3059 if (!hdr) 3060 return -EMSGSIZE; 3061 3062 if (ip_vs_genl_fill_service(skb, svc) < 0) 3063 goto nla_put_failure; 3064 3065 genlmsg_end(skb, hdr); 3066 return 0; 3067 3068 nla_put_failure: 3069 genlmsg_cancel(skb, hdr); 3070 return -EMSGSIZE; 3071 } 3072 3073 static int ip_vs_genl_dump_services(struct sk_buff *skb, 3074 struct netlink_callback *cb) 3075 { 3076 int idx = 0, i; 3077 int start = cb->args[0]; 3078 struct ip_vs_service *svc; 3079 struct net *net = sock_net(skb->sk); 3080 struct netns_ipvs *ipvs = net_ipvs(net); 3081 3082 mutex_lock(&__ip_vs_mutex); 3083 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3084 hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { 3085 if (++idx <= start || (svc->ipvs != ipvs)) 3086 continue; 3087 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3088 idx--; 3089 goto nla_put_failure; 3090 } 3091 } 3092 } 3093 3094 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { 3095 hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { 3096 if (++idx <= start || (svc->ipvs != ipvs)) 3097 continue; 3098 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { 3099 idx--; 3100 goto nla_put_failure; 3101 } 3102 } 3103 } 3104 3105 nla_put_failure: 3106 mutex_unlock(&__ip_vs_mutex); 3107 cb->args[0] = idx; 3108 3109 return skb->len; 3110 } 3111 3112 static bool ip_vs_is_af_valid(int af) 3113 { 3114 if (af == AF_INET) 3115 return true; 3116 #ifdef CONFIG_IP_VS_IPV6 3117 if (af == AF_INET6 && ipv6_mod_enabled()) 3118 return true; 3119 #endif 3120 return false; 3121 } 3122 3123 static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, 3124 struct ip_vs_service_user_kern *usvc, 3125 struct nlattr *nla, int full_entry, 3126 struct ip_vs_service **ret_svc) 3127 { 3128 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; 3129 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; 3130 struct ip_vs_service *svc; 3131 3132 /* Parse mandatory identifying service fields first */ 3133 if (nla == NULL || 3134 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) 3135 return -EINVAL; 3136 3137 nla_af = attrs[IPVS_SVC_ATTR_AF]; 3138 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; 3139 nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; 3140 nla_port = attrs[IPVS_SVC_ATTR_PORT]; 3141 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; 3142 3143 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) 3144 return -EINVAL; 3145 3146 memset(usvc, 0, sizeof(*usvc)); 3147 3148 usvc->af = nla_get_u16(nla_af); 3149 if (!ip_vs_is_af_valid(usvc->af)) 3150 return -EAFNOSUPPORT; 3151 3152 if (nla_fwmark) { 3153 usvc->protocol = IPPROTO_TCP; 3154 usvc->fwmark = nla_get_u32(nla_fwmark); 3155 } else { 3156 usvc->protocol = nla_get_u16(nla_protocol); 3157 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); 3158 usvc->port = nla_get_be16(nla_port); 3159 usvc->fwmark = 0; 3160 } 3161 3162 rcu_read_lock(); 3163 if (usvc->fwmark) 3164 svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); 3165 else 3166 svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, 3167 &usvc->addr, usvc->port); 3168 rcu_read_unlock(); 3169 *ret_svc = svc; 3170 3171 /* If a full entry was requested, check for the additional fields */ 3172 if (full_entry) { 3173 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, 3174 *nla_netmask; 3175 struct ip_vs_flags flags; 3176 3177 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; 3178 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; 3179 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; 3180 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; 3181 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; 3182 3183 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) 3184 return -EINVAL; 3185 3186 nla_memcpy(&flags, nla_flags, sizeof(flags)); 3187 3188 /* prefill flags from service if it already exists */ 3189 if (svc) 3190 usvc->flags = svc->flags; 3191 3192 /* set new flags from userland */ 3193 usvc->flags = (usvc->flags & ~flags.mask) | 3194 (flags.flags & flags.mask); 3195 usvc->sched_name = nla_data(nla_sched); 3196 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; 3197 usvc->timeout = nla_get_u32(nla_timeout); 3198 usvc->netmask = nla_get_be32(nla_netmask); 3199 } 3200 3201 return 0; 3202 } 3203 3204 static struct ip_vs_service *ip_vs_genl_find_service(struct netns_ipvs *ipvs, 3205 struct nlattr *nla) 3206 { 3207 struct ip_vs_service_user_kern usvc; 3208 struct ip_vs_service *svc; 3209 int ret; 3210 3211 ret = ip_vs_genl_parse_service(ipvs, &usvc, nla, 0, &svc); 3212 return ret ? ERR_PTR(ret) : svc; 3213 } 3214 3215 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) 3216 { 3217 struct nlattr *nl_dest; 3218 struct ip_vs_kstats kstats; 3219 3220 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); 3221 if (!nl_dest) 3222 return -EMSGSIZE; 3223 3224 if (nla_put(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr) || 3225 nla_put_be16(skb, IPVS_DEST_ATTR_PORT, dest->port) || 3226 nla_put_u32(skb, IPVS_DEST_ATTR_FWD_METHOD, 3227 (atomic_read(&dest->conn_flags) & 3228 IP_VS_CONN_F_FWD_MASK)) || 3229 nla_put_u32(skb, IPVS_DEST_ATTR_WEIGHT, 3230 atomic_read(&dest->weight)) || 3231 nla_put_u32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold) || 3232 nla_put_u32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold) || 3233 nla_put_u32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, 3234 atomic_read(&dest->activeconns)) || 3235 nla_put_u32(skb, IPVS_DEST_ATTR_INACT_CONNS, 3236 atomic_read(&dest->inactconns)) || 3237 nla_put_u32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, 3238 atomic_read(&dest->persistconns)) || 3239 nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af)) 3240 goto nla_put_failure; 3241 ip_vs_copy_stats(&kstats, &dest->stats); 3242 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats)) 3243 goto nla_put_failure; 3244 if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats)) 3245 goto nla_put_failure; 3246 3247 nla_nest_end(skb, nl_dest); 3248 3249 return 0; 3250 3251 nla_put_failure: 3252 nla_nest_cancel(skb, nl_dest); 3253 return -EMSGSIZE; 3254 } 3255 3256 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, 3257 struct netlink_callback *cb) 3258 { 3259 void *hdr; 3260 3261 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3262 &ip_vs_genl_family, NLM_F_MULTI, 3263 IPVS_CMD_NEW_DEST); 3264 if (!hdr) 3265 return -EMSGSIZE; 3266 3267 if (ip_vs_genl_fill_dest(skb, dest) < 0) 3268 goto nla_put_failure; 3269 3270 genlmsg_end(skb, hdr); 3271 return 0; 3272 3273 nla_put_failure: 3274 genlmsg_cancel(skb, hdr); 3275 return -EMSGSIZE; 3276 } 3277 3278 static int ip_vs_genl_dump_dests(struct sk_buff *skb, 3279 struct netlink_callback *cb) 3280 { 3281 int idx = 0; 3282 int start = cb->args[0]; 3283 struct ip_vs_service *svc; 3284 struct ip_vs_dest *dest; 3285 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; 3286 struct net *net = sock_net(skb->sk); 3287 struct netns_ipvs *ipvs = net_ipvs(net); 3288 3289 mutex_lock(&__ip_vs_mutex); 3290 3291 /* Try to find the service for which to dump destinations */ 3292 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, 3293 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) 3294 goto out_err; 3295 3296 3297 svc = ip_vs_genl_find_service(ipvs, attrs[IPVS_CMD_ATTR_SERVICE]); 3298 if (IS_ERR(svc) || svc == NULL) 3299 goto out_err; 3300 3301 /* Dump the destinations */ 3302 list_for_each_entry(dest, &svc->destinations, n_list) { 3303 if (++idx <= start) 3304 continue; 3305 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { 3306 idx--; 3307 goto nla_put_failure; 3308 } 3309 } 3310 3311 nla_put_failure: 3312 cb->args[0] = idx; 3313 3314 out_err: 3315 mutex_unlock(&__ip_vs_mutex); 3316 3317 return skb->len; 3318 } 3319 3320 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, 3321 struct nlattr *nla, int full_entry) 3322 { 3323 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; 3324 struct nlattr *nla_addr, *nla_port; 3325 struct nlattr *nla_addr_family; 3326 3327 /* Parse mandatory identifying destination fields first */ 3328 if (nla == NULL || 3329 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) 3330 return -EINVAL; 3331 3332 nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; 3333 nla_port = attrs[IPVS_DEST_ATTR_PORT]; 3334 nla_addr_family = attrs[IPVS_DEST_ATTR_ADDR_FAMILY]; 3335 3336 if (!(nla_addr && nla_port)) 3337 return -EINVAL; 3338 3339 memset(udest, 0, sizeof(*udest)); 3340 3341 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); 3342 udest->port = nla_get_be16(nla_port); 3343 3344 if (nla_addr_family) 3345 udest->af = nla_get_u16(nla_addr_family); 3346 else 3347 udest->af = 0; 3348 3349 /* If a full entry was requested, check for the additional fields */ 3350 if (full_entry) { 3351 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, 3352 *nla_l_thresh; 3353 3354 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; 3355 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; 3356 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; 3357 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; 3358 3359 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) 3360 return -EINVAL; 3361 3362 udest->conn_flags = nla_get_u32(nla_fwd) 3363 & IP_VS_CONN_F_FWD_MASK; 3364 udest->weight = nla_get_u32(nla_weight); 3365 udest->u_threshold = nla_get_u32(nla_u_thresh); 3366 udest->l_threshold = nla_get_u32(nla_l_thresh); 3367 } 3368 3369 return 0; 3370 } 3371 3372 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __u32 state, 3373 struct ipvs_sync_daemon_cfg *c) 3374 { 3375 struct nlattr *nl_daemon; 3376 3377 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); 3378 if (!nl_daemon) 3379 return -EMSGSIZE; 3380 3381 if (nla_put_u32(skb, IPVS_DAEMON_ATTR_STATE, state) || 3382 nla_put_string(skb, IPVS_DAEMON_ATTR_MCAST_IFN, c->mcast_ifn) || 3383 nla_put_u32(skb, IPVS_DAEMON_ATTR_SYNC_ID, c->syncid) || 3384 nla_put_u16(skb, IPVS_DAEMON_ATTR_SYNC_MAXLEN, c->sync_maxlen) || 3385 nla_put_u16(skb, IPVS_DAEMON_ATTR_MCAST_PORT, c->mcast_port) || 3386 nla_put_u8(skb, IPVS_DAEMON_ATTR_MCAST_TTL, c->mcast_ttl)) 3387 goto nla_put_failure; 3388 #ifdef CONFIG_IP_VS_IPV6 3389 if (c->mcast_af == AF_INET6) { 3390 if (nla_put_in6_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP6, 3391 &c->mcast_group.in6)) 3392 goto nla_put_failure; 3393 } else 3394 #endif 3395 if (c->mcast_af == AF_INET && 3396 nla_put_in_addr(skb, IPVS_DAEMON_ATTR_MCAST_GROUP, 3397 c->mcast_group.ip)) 3398 goto nla_put_failure; 3399 nla_nest_end(skb, nl_daemon); 3400 3401 return 0; 3402 3403 nla_put_failure: 3404 nla_nest_cancel(skb, nl_daemon); 3405 return -EMSGSIZE; 3406 } 3407 3408 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state, 3409 struct ipvs_sync_daemon_cfg *c, 3410 struct netlink_callback *cb) 3411 { 3412 void *hdr; 3413 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 3414 &ip_vs_genl_family, NLM_F_MULTI, 3415 IPVS_CMD_NEW_DAEMON); 3416 if (!hdr) 3417 return -EMSGSIZE; 3418 3419 if (ip_vs_genl_fill_daemon(skb, state, c)) 3420 goto nla_put_failure; 3421 3422 genlmsg_end(skb, hdr); 3423 return 0; 3424 3425 nla_put_failure: 3426 genlmsg_cancel(skb, hdr); 3427 return -EMSGSIZE; 3428 } 3429 3430 static int ip_vs_genl_dump_daemons(struct sk_buff *skb, 3431 struct netlink_callback *cb) 3432 { 3433 struct net *net = sock_net(skb->sk); 3434 struct netns_ipvs *ipvs = net_ipvs(net); 3435 3436 mutex_lock(&ipvs->sync_mutex); 3437 if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { 3438 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, 3439 &ipvs->mcfg, cb) < 0) 3440 goto nla_put_failure; 3441 3442 cb->args[0] = 1; 3443 } 3444 3445 if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { 3446 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, 3447 &ipvs->bcfg, cb) < 0) 3448 goto nla_put_failure; 3449 3450 cb->args[1] = 1; 3451 } 3452 3453 nla_put_failure: 3454 mutex_unlock(&ipvs->sync_mutex); 3455 3456 return skb->len; 3457 } 3458 3459 static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3460 { 3461 struct ipvs_sync_daemon_cfg c; 3462 struct nlattr *a; 3463 int ret; 3464 3465 memset(&c, 0, sizeof(c)); 3466 if (!(attrs[IPVS_DAEMON_ATTR_STATE] && 3467 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && 3468 attrs[IPVS_DAEMON_ATTR_SYNC_ID])) 3469 return -EINVAL; 3470 strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), 3471 sizeof(c.mcast_ifn)); 3472 c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); 3473 3474 a = attrs[IPVS_DAEMON_ATTR_SYNC_MAXLEN]; 3475 if (a) 3476 c.sync_maxlen = nla_get_u16(a); 3477 3478 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP]; 3479 if (a) { 3480 c.mcast_af = AF_INET; 3481 c.mcast_group.ip = nla_get_in_addr(a); 3482 if (!ipv4_is_multicast(c.mcast_group.ip)) 3483 return -EINVAL; 3484 } else { 3485 a = attrs[IPVS_DAEMON_ATTR_MCAST_GROUP6]; 3486 if (a) { 3487 #ifdef CONFIG_IP_VS_IPV6 3488 int addr_type; 3489 3490 c.mcast_af = AF_INET6; 3491 c.mcast_group.in6 = nla_get_in6_addr(a); 3492 addr_type = ipv6_addr_type(&c.mcast_group.in6); 3493 if (!(addr_type & IPV6_ADDR_MULTICAST)) 3494 return -EINVAL; 3495 #else 3496 return -EAFNOSUPPORT; 3497 #endif 3498 } 3499 } 3500 3501 a = attrs[IPVS_DAEMON_ATTR_MCAST_PORT]; 3502 if (a) 3503 c.mcast_port = nla_get_u16(a); 3504 3505 a = attrs[IPVS_DAEMON_ATTR_MCAST_TTL]; 3506 if (a) 3507 c.mcast_ttl = nla_get_u8(a); 3508 3509 /* The synchronization protocol is incompatible with mixed family 3510 * services 3511 */ 3512 if (ipvs->mixed_address_family_dests > 0) 3513 return -EINVAL; 3514 3515 ret = start_sync_thread(ipvs, &c, 3516 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3517 return ret; 3518 } 3519 3520 static int ip_vs_genl_del_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) 3521 { 3522 int ret; 3523 3524 if (!attrs[IPVS_DAEMON_ATTR_STATE]) 3525 return -EINVAL; 3526 3527 mutex_lock(&ipvs->sync_mutex); 3528 ret = stop_sync_thread(ipvs, 3529 nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); 3530 mutex_unlock(&ipvs->sync_mutex); 3531 return ret; 3532 } 3533 3534 static int ip_vs_genl_set_config(struct netns_ipvs *ipvs, struct nlattr **attrs) 3535 { 3536 struct ip_vs_timeout_user t; 3537 3538 __ip_vs_get_timeouts(ipvs, &t); 3539 3540 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) 3541 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); 3542 3543 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) 3544 t.tcp_fin_timeout = 3545 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); 3546 3547 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) 3548 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); 3549 3550 return ip_vs_set_timeout(ipvs, &t); 3551 } 3552 3553 static int ip_vs_genl_set_daemon(struct sk_buff *skb, struct genl_info *info) 3554 { 3555 int ret = -EINVAL, cmd; 3556 struct net *net = sock_net(skb->sk); 3557 struct netns_ipvs *ipvs = net_ipvs(net); 3558 3559 cmd = info->genlhdr->cmd; 3560 3561 if (cmd == IPVS_CMD_NEW_DAEMON || cmd == IPVS_CMD_DEL_DAEMON) { 3562 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; 3563 3564 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || 3565 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, 3566 info->attrs[IPVS_CMD_ATTR_DAEMON], 3567 ip_vs_daemon_policy)) 3568 goto out; 3569 3570 if (cmd == IPVS_CMD_NEW_DAEMON) 3571 ret = ip_vs_genl_new_daemon(ipvs, daemon_attrs); 3572 else 3573 ret = ip_vs_genl_del_daemon(ipvs, daemon_attrs); 3574 } 3575 3576 out: 3577 return ret; 3578 } 3579 3580 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) 3581 { 3582 struct ip_vs_service *svc = NULL; 3583 struct ip_vs_service_user_kern usvc; 3584 struct ip_vs_dest_user_kern udest; 3585 int ret = 0, cmd; 3586 int need_full_svc = 0, need_full_dest = 0; 3587 struct net *net = sock_net(skb->sk); 3588 struct netns_ipvs *ipvs = net_ipvs(net); 3589 3590 cmd = info->genlhdr->cmd; 3591 3592 mutex_lock(&__ip_vs_mutex); 3593 3594 if (cmd == IPVS_CMD_FLUSH) { 3595 ret = ip_vs_flush(ipvs, false); 3596 goto out; 3597 } else if (cmd == IPVS_CMD_SET_CONFIG) { 3598 ret = ip_vs_genl_set_config(ipvs, info->attrs); 3599 goto out; 3600 } else if (cmd == IPVS_CMD_ZERO && 3601 !info->attrs[IPVS_CMD_ATTR_SERVICE]) { 3602 ret = ip_vs_zero_all(ipvs); 3603 goto out; 3604 } 3605 3606 /* All following commands require a service argument, so check if we 3607 * received a valid one. We need a full service specification when 3608 * adding / editing a service. Only identifying members otherwise. */ 3609 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) 3610 need_full_svc = 1; 3611 3612 ret = ip_vs_genl_parse_service(ipvs, &usvc, 3613 info->attrs[IPVS_CMD_ATTR_SERVICE], 3614 need_full_svc, &svc); 3615 if (ret) 3616 goto out; 3617 3618 /* Unless we're adding a new service, the service must already exist */ 3619 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { 3620 ret = -ESRCH; 3621 goto out; 3622 } 3623 3624 /* Destination commands require a valid destination argument. For 3625 * adding / editing a destination, we need a full destination 3626 * specification. */ 3627 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || 3628 cmd == IPVS_CMD_DEL_DEST) { 3629 if (cmd != IPVS_CMD_DEL_DEST) 3630 need_full_dest = 1; 3631 3632 ret = ip_vs_genl_parse_dest(&udest, 3633 info->attrs[IPVS_CMD_ATTR_DEST], 3634 need_full_dest); 3635 if (ret) 3636 goto out; 3637 3638 /* Old protocols did not allow the user to specify address 3639 * family, so we set it to zero instead. We also didn't 3640 * allow heterogeneous pools in the old code, so it's safe 3641 * to assume that this will have the same address family as 3642 * the service. 3643 */ 3644 if (udest.af == 0) 3645 udest.af = svc->af; 3646 3647 if (!ip_vs_is_af_valid(udest.af)) { 3648 ret = -EAFNOSUPPORT; 3649 goto out; 3650 } 3651 3652 if (udest.af != svc->af && cmd != IPVS_CMD_DEL_DEST) { 3653 /* The synchronization protocol is incompatible 3654 * with mixed family services 3655 */ 3656 if (ipvs->sync_state) { 3657 ret = -EINVAL; 3658 goto out; 3659 } 3660 3661 /* Which connection types do we support? */ 3662 switch (udest.conn_flags) { 3663 case IP_VS_CONN_F_TUNNEL: 3664 /* We are able to forward this */ 3665 break; 3666 default: 3667 ret = -EINVAL; 3668 goto out; 3669 } 3670 } 3671 } 3672 3673 switch (cmd) { 3674 case IPVS_CMD_NEW_SERVICE: 3675 if (svc == NULL) 3676 ret = ip_vs_add_service(ipvs, &usvc, &svc); 3677 else 3678 ret = -EEXIST; 3679 break; 3680 case IPVS_CMD_SET_SERVICE: 3681 ret = ip_vs_edit_service(svc, &usvc); 3682 break; 3683 case IPVS_CMD_DEL_SERVICE: 3684 ret = ip_vs_del_service(svc); 3685 /* do not use svc, it can be freed */ 3686 break; 3687 case IPVS_CMD_NEW_DEST: 3688 ret = ip_vs_add_dest(svc, &udest); 3689 break; 3690 case IPVS_CMD_SET_DEST: 3691 ret = ip_vs_edit_dest(svc, &udest); 3692 break; 3693 case IPVS_CMD_DEL_DEST: 3694 ret = ip_vs_del_dest(svc, &udest); 3695 break; 3696 case IPVS_CMD_ZERO: 3697 ret = ip_vs_zero_service(svc); 3698 break; 3699 default: 3700 ret = -EINVAL; 3701 } 3702 3703 out: 3704 mutex_unlock(&__ip_vs_mutex); 3705 3706 return ret; 3707 } 3708 3709 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) 3710 { 3711 struct sk_buff *msg; 3712 void *reply; 3713 int ret, cmd, reply_cmd; 3714 struct net *net = sock_net(skb->sk); 3715 struct netns_ipvs *ipvs = net_ipvs(net); 3716 3717 cmd = info->genlhdr->cmd; 3718 3719 if (cmd == IPVS_CMD_GET_SERVICE) 3720 reply_cmd = IPVS_CMD_NEW_SERVICE; 3721 else if (cmd == IPVS_CMD_GET_INFO) 3722 reply_cmd = IPVS_CMD_SET_INFO; 3723 else if (cmd == IPVS_CMD_GET_CONFIG) 3724 reply_cmd = IPVS_CMD_SET_CONFIG; 3725 else { 3726 pr_err("unknown Generic Netlink command\n"); 3727 return -EINVAL; 3728 } 3729 3730 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); 3731 if (!msg) 3732 return -ENOMEM; 3733 3734 mutex_lock(&__ip_vs_mutex); 3735 3736 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); 3737 if (reply == NULL) 3738 goto nla_put_failure; 3739 3740 switch (cmd) { 3741 case IPVS_CMD_GET_SERVICE: 3742 { 3743 struct ip_vs_service *svc; 3744 3745 svc = ip_vs_genl_find_service(ipvs, 3746 info->attrs[IPVS_CMD_ATTR_SERVICE]); 3747 if (IS_ERR(svc)) { 3748 ret = PTR_ERR(svc); 3749 goto out_err; 3750 } else if (svc) { 3751 ret = ip_vs_genl_fill_service(msg, svc); 3752 if (ret) 3753 goto nla_put_failure; 3754 } else { 3755 ret = -ESRCH; 3756 goto out_err; 3757 } 3758 3759 break; 3760 } 3761 3762 case IPVS_CMD_GET_CONFIG: 3763 { 3764 struct ip_vs_timeout_user t; 3765 3766 __ip_vs_get_timeouts(ipvs, &t); 3767 #ifdef CONFIG_IP_VS_PROTO_TCP 3768 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, 3769 t.tcp_timeout) || 3770 nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, 3771 t.tcp_fin_timeout)) 3772 goto nla_put_failure; 3773 #endif 3774 #ifdef CONFIG_IP_VS_PROTO_UDP 3775 if (nla_put_u32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout)) 3776 goto nla_put_failure; 3777 #endif 3778 3779 break; 3780 } 3781 3782 case IPVS_CMD_GET_INFO: 3783 if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, 3784 IP_VS_VERSION_CODE) || 3785 nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, 3786 ip_vs_conn_tab_size)) 3787 goto nla_put_failure; 3788 break; 3789 } 3790 3791 genlmsg_end(msg, reply); 3792 ret = genlmsg_reply(msg, info); 3793 goto out; 3794 3795 nla_put_failure: 3796 pr_err("not enough space in Netlink message\n"); 3797 ret = -EMSGSIZE; 3798 3799 out_err: 3800 nlmsg_free(msg); 3801 out: 3802 mutex_unlock(&__ip_vs_mutex); 3803 3804 return ret; 3805 } 3806 3807 3808 static const struct genl_ops ip_vs_genl_ops[] = { 3809 { 3810 .cmd = IPVS_CMD_NEW_SERVICE, 3811 .flags = GENL_ADMIN_PERM, 3812 .policy = ip_vs_cmd_policy, 3813 .doit = ip_vs_genl_set_cmd, 3814 }, 3815 { 3816 .cmd = IPVS_CMD_SET_SERVICE, 3817 .flags = GENL_ADMIN_PERM, 3818 .policy = ip_vs_cmd_policy, 3819 .doit = ip_vs_genl_set_cmd, 3820 }, 3821 { 3822 .cmd = IPVS_CMD_DEL_SERVICE, 3823 .flags = GENL_ADMIN_PERM, 3824 .policy = ip_vs_cmd_policy, 3825 .doit = ip_vs_genl_set_cmd, 3826 }, 3827 { 3828 .cmd = IPVS_CMD_GET_SERVICE, 3829 .flags = GENL_ADMIN_PERM, 3830 .doit = ip_vs_genl_get_cmd, 3831 .dumpit = ip_vs_genl_dump_services, 3832 .policy = ip_vs_cmd_policy, 3833 }, 3834 { 3835 .cmd = IPVS_CMD_NEW_DEST, 3836 .flags = GENL_ADMIN_PERM, 3837 .policy = ip_vs_cmd_policy, 3838 .doit = ip_vs_genl_set_cmd, 3839 }, 3840 { 3841 .cmd = IPVS_CMD_SET_DEST, 3842 .flags = GENL_ADMIN_PERM, 3843 .policy = ip_vs_cmd_policy, 3844 .doit = ip_vs_genl_set_cmd, 3845 }, 3846 { 3847 .cmd = IPVS_CMD_DEL_DEST, 3848 .flags = GENL_ADMIN_PERM, 3849 .policy = ip_vs_cmd_policy, 3850 .doit = ip_vs_genl_set_cmd, 3851 }, 3852 { 3853 .cmd = IPVS_CMD_GET_DEST, 3854 .flags = GENL_ADMIN_PERM, 3855 .policy = ip_vs_cmd_policy, 3856 .dumpit = ip_vs_genl_dump_dests, 3857 }, 3858 { 3859 .cmd = IPVS_CMD_NEW_DAEMON, 3860 .flags = GENL_ADMIN_PERM, 3861 .policy = ip_vs_cmd_policy, 3862 .doit = ip_vs_genl_set_daemon, 3863 }, 3864 { 3865 .cmd = IPVS_CMD_DEL_DAEMON, 3866 .flags = GENL_ADMIN_PERM, 3867 .policy = ip_vs_cmd_policy, 3868 .doit = ip_vs_genl_set_daemon, 3869 }, 3870 { 3871 .cmd = IPVS_CMD_GET_DAEMON, 3872 .flags = GENL_ADMIN_PERM, 3873 .dumpit = ip_vs_genl_dump_daemons, 3874 }, 3875 { 3876 .cmd = IPVS_CMD_SET_CONFIG, 3877 .flags = GENL_ADMIN_PERM, 3878 .policy = ip_vs_cmd_policy, 3879 .doit = ip_vs_genl_set_cmd, 3880 }, 3881 { 3882 .cmd = IPVS_CMD_GET_CONFIG, 3883 .flags = GENL_ADMIN_PERM, 3884 .doit = ip_vs_genl_get_cmd, 3885 }, 3886 { 3887 .cmd = IPVS_CMD_GET_INFO, 3888 .flags = GENL_ADMIN_PERM, 3889 .doit = ip_vs_genl_get_cmd, 3890 }, 3891 { 3892 .cmd = IPVS_CMD_ZERO, 3893 .flags = GENL_ADMIN_PERM, 3894 .policy = ip_vs_cmd_policy, 3895 .doit = ip_vs_genl_set_cmd, 3896 }, 3897 { 3898 .cmd = IPVS_CMD_FLUSH, 3899 .flags = GENL_ADMIN_PERM, 3900 .doit = ip_vs_genl_set_cmd, 3901 }, 3902 }; 3903 3904 static int __init ip_vs_genl_register(void) 3905 { 3906 return genl_register_family_with_ops(&ip_vs_genl_family, 3907 ip_vs_genl_ops); 3908 } 3909 3910 static void ip_vs_genl_unregister(void) 3911 { 3912 genl_unregister_family(&ip_vs_genl_family); 3913 } 3914 3915 /* End of Generic Netlink interface definitions */ 3916 3917 /* 3918 * per netns intit/exit func. 3919 */ 3920 #ifdef CONFIG_SYSCTL 3921 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) 3922 { 3923 struct net *net = ipvs->net; 3924 int idx; 3925 struct ctl_table *tbl; 3926 3927 atomic_set(&ipvs->dropentry, 0); 3928 spin_lock_init(&ipvs->dropentry_lock); 3929 spin_lock_init(&ipvs->droppacket_lock); 3930 spin_lock_init(&ipvs->securetcp_lock); 3931 3932 if (!net_eq(net, &init_net)) { 3933 tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL); 3934 if (tbl == NULL) 3935 return -ENOMEM; 3936 3937 /* Don't export sysctls to unprivileged users */ 3938 if (net->user_ns != &init_user_ns) 3939 tbl[0].procname = NULL; 3940 } else 3941 tbl = vs_vars; 3942 /* Initialize sysctl defaults */ 3943 for (idx = 0; idx < ARRAY_SIZE(vs_vars); idx++) { 3944 if (tbl[idx].proc_handler == proc_do_defense_mode) 3945 tbl[idx].extra2 = ipvs; 3946 } 3947 idx = 0; 3948 ipvs->sysctl_amemthresh = 1024; 3949 tbl[idx++].data = &ipvs->sysctl_amemthresh; 3950 ipvs->sysctl_am_droprate = 10; 3951 tbl[idx++].data = &ipvs->sysctl_am_droprate; 3952 tbl[idx++].data = &ipvs->sysctl_drop_entry; 3953 tbl[idx++].data = &ipvs->sysctl_drop_packet; 3954 #ifdef CONFIG_IP_VS_NFCT 3955 tbl[idx++].data = &ipvs->sysctl_conntrack; 3956 #endif 3957 tbl[idx++].data = &ipvs->sysctl_secure_tcp; 3958 ipvs->sysctl_snat_reroute = 1; 3959 tbl[idx++].data = &ipvs->sysctl_snat_reroute; 3960 ipvs->sysctl_sync_ver = 1; 3961 tbl[idx++].data = &ipvs->sysctl_sync_ver; 3962 ipvs->sysctl_sync_ports = 1; 3963 tbl[idx++].data = &ipvs->sysctl_sync_ports; 3964 tbl[idx++].data = &ipvs->sysctl_sync_persist_mode; 3965 ipvs->sysctl_sync_qlen_max = nr_free_buffer_pages() / 32; 3966 tbl[idx++].data = &ipvs->sysctl_sync_qlen_max; 3967 ipvs->sysctl_sync_sock_size = 0; 3968 tbl[idx++].data = &ipvs->sysctl_sync_sock_size; 3969 tbl[idx++].data = &ipvs->sysctl_cache_bypass; 3970 tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn; 3971 tbl[idx++].data = &ipvs->sysctl_sloppy_tcp; 3972 tbl[idx++].data = &ipvs->sysctl_sloppy_sctp; 3973 tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template; 3974 ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD; 3975 ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD; 3976 tbl[idx].data = &ipvs->sysctl_sync_threshold; 3977 tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold); 3978 ipvs->sysctl_sync_refresh_period = DEFAULT_SYNC_REFRESH_PERIOD; 3979 tbl[idx++].data = &ipvs->sysctl_sync_refresh_period; 3980 ipvs->sysctl_sync_retries = clamp_t(int, DEFAULT_SYNC_RETRIES, 0, 3); 3981 tbl[idx++].data = &ipvs->sysctl_sync_retries; 3982 tbl[idx++].data = &ipvs->sysctl_nat_icmp_send; 3983 ipvs->sysctl_pmtu_disc = 1; 3984 tbl[idx++].data = &ipvs->sysctl_pmtu_disc; 3985 tbl[idx++].data = &ipvs->sysctl_backup_only; 3986 ipvs->sysctl_conn_reuse_mode = 1; 3987 tbl[idx++].data = &ipvs->sysctl_conn_reuse_mode; 3988 tbl[idx++].data = &ipvs->sysctl_schedule_icmp; 3989 tbl[idx++].data = &ipvs->sysctl_ignore_tunneled; 3990 #ifdef CONFIG_IP_VS_DEBUG 3991 /* Global sysctls must be ro in non-init netns */ 3992 if (!net_eq(net, &init_net)) 3993 tbl[idx++].mode = 0444; 3994 #endif 3995 3996 ipvs->sysctl_hdr = register_net_sysctl(net, "net/ipv4/vs", tbl); 3997 if (ipvs->sysctl_hdr == NULL) { 3998 if (!net_eq(net, &init_net)) 3999 kfree(tbl); 4000 return -ENOMEM; 4001 } 4002 ip_vs_start_estimator(ipvs, &ipvs->tot_stats); 4003 ipvs->sysctl_tbl = tbl; 4004 /* Schedule defense work */ 4005 INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler); 4006 schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD); 4007 4008 return 0; 4009 } 4010 4011 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) 4012 { 4013 struct net *net = ipvs->net; 4014 4015 cancel_delayed_work_sync(&ipvs->defense_work); 4016 cancel_work_sync(&ipvs->defense_work.work); 4017 unregister_net_sysctl_table(ipvs->sysctl_hdr); 4018 ip_vs_stop_estimator(ipvs, &ipvs->tot_stats); 4019 4020 if (!net_eq(net, &init_net)) 4021 kfree(ipvs->sysctl_tbl); 4022 } 4023 4024 #else 4025 4026 static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) { return 0; } 4027 static void __net_exit ip_vs_control_net_cleanup_sysctl(struct netns_ipvs *ipvs) { } 4028 4029 #endif 4030 4031 static struct notifier_block ip_vs_dst_notifier = { 4032 .notifier_call = ip_vs_dst_event, 4033 #ifdef CONFIG_IP_VS_IPV6 4034 .priority = ADDRCONF_NOTIFY_PRIORITY + 5, 4035 #endif 4036 }; 4037 4038 int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) 4039 { 4040 int i, idx; 4041 4042 /* Initialize rs_table */ 4043 for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) 4044 INIT_HLIST_HEAD(&ipvs->rs_table[idx]); 4045 4046 INIT_LIST_HEAD(&ipvs->dest_trash); 4047 spin_lock_init(&ipvs->dest_trash_lock); 4048 setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 4049 (unsigned long) ipvs); 4050 atomic_set(&ipvs->ftpsvc_counter, 0); 4051 atomic_set(&ipvs->nullsvc_counter, 0); 4052 atomic_set(&ipvs->conn_out_counter, 0); 4053 4054 /* procfs stats */ 4055 ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats); 4056 if (!ipvs->tot_stats.cpustats) 4057 return -ENOMEM; 4058 4059 for_each_possible_cpu(i) { 4060 struct ip_vs_cpu_stats *ipvs_tot_stats; 4061 ipvs_tot_stats = per_cpu_ptr(ipvs->tot_stats.cpustats, i); 4062 u64_stats_init(&ipvs_tot_stats->syncp); 4063 } 4064 4065 spin_lock_init(&ipvs->tot_stats.lock); 4066 4067 proc_create("ip_vs", 0, ipvs->net->proc_net, &ip_vs_info_fops); 4068 proc_create("ip_vs_stats", 0, ipvs->net->proc_net, &ip_vs_stats_fops); 4069 proc_create("ip_vs_stats_percpu", 0, ipvs->net->proc_net, 4070 &ip_vs_stats_percpu_fops); 4071 4072 if (ip_vs_control_net_init_sysctl(ipvs)) 4073 goto err; 4074 4075 return 0; 4076 4077 err: 4078 free_percpu(ipvs->tot_stats.cpustats); 4079 return -ENOMEM; 4080 } 4081 4082 void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) 4083 { 4084 ip_vs_trash_cleanup(ipvs); 4085 ip_vs_control_net_cleanup_sysctl(ipvs); 4086 remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); 4087 remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); 4088 remove_proc_entry("ip_vs", ipvs->net->proc_net); 4089 free_percpu(ipvs->tot_stats.cpustats); 4090 } 4091 4092 int __init ip_vs_register_nl_ioctl(void) 4093 { 4094 int ret; 4095 4096 ret = nf_register_sockopt(&ip_vs_sockopts); 4097 if (ret) { 4098 pr_err("cannot register sockopt.\n"); 4099 goto err_sock; 4100 } 4101 4102 ret = ip_vs_genl_register(); 4103 if (ret) { 4104 pr_err("cannot register Generic Netlink interface.\n"); 4105 goto err_genl; 4106 } 4107 return 0; 4108 4109 err_genl: 4110 nf_unregister_sockopt(&ip_vs_sockopts); 4111 err_sock: 4112 return ret; 4113 } 4114 4115 void ip_vs_unregister_nl_ioctl(void) 4116 { 4117 ip_vs_genl_unregister(); 4118 nf_unregister_sockopt(&ip_vs_sockopts); 4119 } 4120 4121 int __init ip_vs_control_init(void) 4122 { 4123 int idx; 4124 int ret; 4125 4126 EnterFunction(2); 4127 4128 /* Initialize svc_table, ip_vs_svc_fwm_table */ 4129 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { 4130 INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); 4131 INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); 4132 } 4133 4134 smp_wmb(); /* Do we really need it now ? */ 4135 4136 ret = register_netdevice_notifier(&ip_vs_dst_notifier); 4137 if (ret < 0) 4138 return ret; 4139 4140 LeaveFunction(2); 4141 return 0; 4142 } 4143 4144 4145 void ip_vs_control_cleanup(void) 4146 { 4147 EnterFunction(2); 4148 unregister_netdevice_notifier(&ip_vs_dst_notifier); 4149 LeaveFunction(2); 4150 } 4151
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.