1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Cluster IP hashmark target 3 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> 4 * based on ideas of Fabio Olive Leite <olive@unixforge.org> 5 * 6 * Development of this code funded by SuSE Linux AG, http://www.suse.com/ 7 */ 8 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 9 #include <linux/module.h> 10 #include <linux/proc_fs.h> 11 #include <linux/jhash.h> 12 #include <linux/bitops.h> 13 #include <linux/skbuff.h> 14 #include <linux/slab.h> 15 #include <linux/ip.h> 16 #include <linux/tcp.h> 17 #include <linux/udp.h> 18 #include <linux/icmp.h> 19 #include <linux/if_arp.h> 20 #include <linux/seq_file.h> 21 #include <linux/refcount.h> 22 #include <linux/netfilter_arp.h> 23 #include <linux/netfilter/x_tables.h> 24 #include <linux/netfilter_ipv4/ip_tables.h> 25 #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> 26 #include <net/netfilter/nf_conntrack.h> 27 #include <net/net_namespace.h> 28 #include <net/netns/generic.h> 29 #include <net/checksum.h> 30 #include <net/ip.h> 31 32 #define CLUSTERIP_VERSION "0.8" 33 34 MODULE_LICENSE("GPL"); 35 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); 36 MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); 37 38 struct clusterip_config { 39 struct list_head list; /* list of all configs */ 40 refcount_t refcount; /* reference count */ 41 refcount_t entries; /* number of entries/rules 42 * referencing us */ 43 44 __be32 clusterip; /* the IP address */ 45 u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ 46 int ifindex; /* device ifindex */ 47 u_int16_t num_total_nodes; /* total number of nodes */ 48 unsigned long local_nodes; /* node number array */ 49 50 #ifdef CONFIG_PROC_FS 51 struct proc_dir_entry *pde; /* proc dir entry */ 52 #endif 53 enum clusterip_hashmode hash_mode; /* which hashing mode */ 54 u_int32_t hash_initval; /* hash initialization */ 55 struct rcu_head rcu; /* for call_rcu */ 56 struct net *net; /* netns for pernet list */ 57 char ifname[IFNAMSIZ]; /* device ifname */ 58 }; 59 60 #ifdef CONFIG_PROC_FS 61 static const struct proc_ops clusterip_proc_ops; 62 #endif 63 64 struct clusterip_net { 65 struct list_head configs; 66 /* lock protects the configs list */ 67 spinlock_t lock; 68 69 #ifdef CONFIG_PROC_FS 70 struct proc_dir_entry *procdir; 71 /* mutex protects the config->pde*/ 72 struct mutex mutex; 73 #endif 74 }; 75 76 static unsigned int clusterip_net_id __read_mostly; 77 static inline struct clusterip_net *clusterip_pernet(struct net *net) 78 { 79 return net_generic(net, clusterip_net_id); 80 } 81 82 static inline void 83 clusterip_config_get(struct clusterip_config *c) 84 { 85 refcount_inc(&c->refcount); 86 } 87 88 static void clusterip_config_rcu_free(struct rcu_head *head) 89 { 90 struct clusterip_config *config; 91 struct net_device *dev; 92 93 config = container_of(head, struct clusterip_config, rcu); 94 dev = dev_get_by_name(config->net, config->ifname); 95 if (dev) { 96 dev_mc_del(dev, config->clustermac); 97 dev_put(dev); 98 } 99 kfree(config); 100 } 101 102 static inline void 103 clusterip_config_put(struct clusterip_config *c) 104 { 105 if (refcount_dec_and_test(&c->refcount)) 106 call_rcu(&c->rcu, clusterip_config_rcu_free); 107 } 108 109 /* decrease the count of entries using/referencing this config. If last 110 * entry(rule) is removed, remove the config from lists, but don't free it 111 * yet, since proc-files could still be holding references */ 112 static inline void 113 clusterip_config_entry_put(struct clusterip_config *c) 114 { 115 struct clusterip_net *cn = clusterip_pernet(c->net); 116 117 local_bh_disable(); 118 if (refcount_dec_and_lock(&c->entries, &cn->lock)) { 119 list_del_rcu(&c->list); 120 spin_unlock(&cn->lock); 121 local_bh_enable(); 122 /* In case anyone still accesses the file, the open/close 123 * functions are also incrementing the refcount on their own, 124 * so it's safe to remove the entry even if it's in use. */ 125 #ifdef CONFIG_PROC_FS 126 mutex_lock(&cn->mutex); 127 if (cn->procdir) 128 proc_remove(c->pde); 129 mutex_unlock(&cn->mutex); 130 #endif 131 return; 132 } 133 local_bh_enable(); 134 } 135 136 static struct clusterip_config * 137 __clusterip_config_find(struct net *net, __be32 clusterip) 138 { 139 struct clusterip_config *c; 140 struct clusterip_net *cn = clusterip_pernet(net); 141 142 list_for_each_entry_rcu(c, &cn->configs, list) { 143 if (c->clusterip == clusterip) 144 return c; 145 } 146 147 return NULL; 148 } 149 150 static inline struct clusterip_config * 151 clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) 152 { 153 struct clusterip_config *c; 154 155 rcu_read_lock_bh(); 156 c = __clusterip_config_find(net, clusterip); 157 if (c) { 158 #ifdef CONFIG_PROC_FS 159 if (!c->pde) 160 c = NULL; 161 else 162 #endif 163 if (unlikely(!refcount_inc_not_zero(&c->refcount))) 164 c = NULL; 165 else if (entry) { 166 if (unlikely(!refcount_inc_not_zero(&c->entries))) { 167 clusterip_config_put(c); 168 c = NULL; 169 } 170 } 171 } 172 rcu_read_unlock_bh(); 173 174 return c; 175 } 176 177 static void 178 clusterip_config_init_nodelist(struct clusterip_config *c, 179 const struct ipt_clusterip_tgt_info *i) 180 { 181 int n; 182 183 for (n = 0; n < i->num_local_nodes; n++) 184 set_bit(i->local_nodes[n] - 1, &c->local_nodes); 185 } 186 187 static int 188 clusterip_netdev_event(struct notifier_block *this, unsigned long event, 189 void *ptr) 190 { 191 struct net_device *dev = netdev_notifier_info_to_dev(ptr); 192 struct net *net = dev_net(dev); 193 struct clusterip_net *cn = clusterip_pernet(net); 194 struct clusterip_config *c; 195 196 spin_lock_bh(&cn->lock); 197 list_for_each_entry_rcu(c, &cn->configs, list) { 198 switch (event) { 199 case NETDEV_REGISTER: 200 if (!strcmp(dev->name, c->ifname)) { 201 c->ifindex = dev->ifindex; 202 dev_mc_add(dev, c->clustermac); 203 } 204 break; 205 case NETDEV_UNREGISTER: 206 if (dev->ifindex == c->ifindex) { 207 dev_mc_del(dev, c->clustermac); 208 c->ifindex = -1; 209 } 210 break; 211 case NETDEV_CHANGENAME: 212 if (!strcmp(dev->name, c->ifname)) { 213 c->ifindex = dev->ifindex; 214 dev_mc_add(dev, c->clustermac); 215 } else if (dev->ifindex == c->ifindex) { 216 dev_mc_del(dev, c->clustermac); 217 c->ifindex = -1; 218 } 219 break; 220 } 221 } 222 spin_unlock_bh(&cn->lock); 223 224 return NOTIFY_DONE; 225 } 226 227 static struct clusterip_config * 228 clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i, 229 __be32 ip, const char *iniface) 230 { 231 struct clusterip_net *cn = clusterip_pernet(net); 232 struct clusterip_config *c; 233 struct net_device *dev; 234 int err; 235 236 if (iniface[0] == '\0') { 237 pr_info("Please specify an interface name\n"); 238 return ERR_PTR(-EINVAL); 239 } 240 241 c = kzalloc(sizeof(*c), GFP_ATOMIC); 242 if (!c) 243 return ERR_PTR(-ENOMEM); 244 245 dev = dev_get_by_name(net, iniface); 246 if (!dev) { 247 pr_info("no such interface %s\n", iniface); 248 kfree(c); 249 return ERR_PTR(-ENOENT); 250 } 251 c->ifindex = dev->ifindex; 252 strcpy(c->ifname, dev->name); 253 memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); 254 dev_mc_add(dev, c->clustermac); 255 dev_put(dev); 256 257 c->clusterip = ip; 258 c->num_total_nodes = i->num_total_nodes; 259 clusterip_config_init_nodelist(c, i); 260 c->hash_mode = i->hash_mode; 261 c->hash_initval = i->hash_initval; 262 c->net = net; 263 refcount_set(&c->refcount, 1); 264 265 spin_lock_bh(&cn->lock); 266 if (__clusterip_config_find(net, ip)) { 267 err = -EBUSY; 268 goto out_config_put; 269 } 270 271 list_add_rcu(&c->list, &cn->configs); 272 spin_unlock_bh(&cn->lock); 273 274 #ifdef CONFIG_PROC_FS 275 { 276 char buffer[16]; 277 278 /* create proc dir entry */ 279 sprintf(buffer, "%pI4", &ip); 280 mutex_lock(&cn->mutex); 281 c->pde = proc_create_data(buffer, 0600, 282 cn->procdir, 283 &clusterip_proc_ops, c); 284 mutex_unlock(&cn->mutex); 285 if (!c->pde) { 286 err = -ENOMEM; 287 goto err; 288 } 289 } 290 #endif 291 292 refcount_set(&c->entries, 1); 293 return c; 294 295 #ifdef CONFIG_PROC_FS 296 err: 297 #endif 298 spin_lock_bh(&cn->lock); 299 list_del_rcu(&c->list); 300 out_config_put: 301 spin_unlock_bh(&cn->lock); 302 clusterip_config_put(c); 303 return ERR_PTR(err); 304 } 305 306 #ifdef CONFIG_PROC_FS 307 static int 308 clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) 309 { 310 311 if (nodenum == 0 || 312 nodenum > c->num_total_nodes) 313 return 1; 314 315 /* check if we already have this number in our bitfield */ 316 if (test_and_set_bit(nodenum - 1, &c->local_nodes)) 317 return 1; 318 319 return 0; 320 } 321 322 static bool 323 clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) 324 { 325 if (nodenum == 0 || 326 nodenum > c->num_total_nodes) 327 return true; 328 329 if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) 330 return false; 331 332 return true; 333 } 334 #endif 335 336 static inline u_int32_t 337 clusterip_hashfn(const struct sk_buff *skb, 338 const struct clusterip_config *config) 339 { 340 const struct iphdr *iph = ip_hdr(skb); 341 unsigned long hashval; 342 u_int16_t sport = 0, dport = 0; 343 int poff; 344 345 poff = proto_ports_offset(iph->protocol); 346 if (poff >= 0) { 347 const u_int16_t *ports; 348 u16 _ports[2]; 349 350 ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); 351 if (ports) { 352 sport = ports[0]; 353 dport = ports[1]; 354 } 355 } else { 356 net_info_ratelimited("unknown protocol %u\n", iph->protocol); 357 } 358 359 switch (config->hash_mode) { 360 case CLUSTERIP_HASHMODE_SIP: 361 hashval = jhash_1word(ntohl(iph->saddr), 362 config->hash_initval); 363 break; 364 case CLUSTERIP_HASHMODE_SIP_SPT: 365 hashval = jhash_2words(ntohl(iph->saddr), sport, 366 config->hash_initval); 367 break; 368 case CLUSTERIP_HASHMODE_SIP_SPT_DPT: 369 hashval = jhash_3words(ntohl(iph->saddr), sport, dport, 370 config->hash_initval); 371 break; 372 default: 373 /* to make gcc happy */ 374 hashval = 0; 375 /* This cannot happen, unless the check function wasn't called 376 * at rule load time */ 377 pr_info("unknown mode %u\n", config->hash_mode); 378 BUG(); 379 break; 380 } 381 382 /* node numbers are 1..n, not 0..n */ 383 return reciprocal_scale(hashval, config->num_total_nodes) + 1; 384 } 385 386 static inline int 387 clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) 388 { 389 return test_bit(hash - 1, &config->local_nodes); 390 } 391 392 /*********************************************************************** 393 * IPTABLES TARGET 394 ***********************************************************************/ 395 396 static unsigned int 397 clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) 398 { 399 const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; 400 struct nf_conn *ct; 401 enum ip_conntrack_info ctinfo; 402 u_int32_t hash; 403 404 /* don't need to clusterip_config_get() here, since refcount 405 * is only decremented by destroy() - and ip_tables guarantees 406 * that the ->target() function isn't called after ->destroy() */ 407 408 ct = nf_ct_get(skb, &ctinfo); 409 if (ct == NULL) 410 return NF_DROP; 411 412 /* special case: ICMP error handling. conntrack distinguishes between 413 * error messages (RELATED) and information requests (see below) */ 414 if (ip_hdr(skb)->protocol == IPPROTO_ICMP && 415 (ctinfo == IP_CT_RELATED || 416 ctinfo == IP_CT_RELATED_REPLY)) 417 return XT_CONTINUE; 418 419 /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO, 420 * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here 421 * on, which all have an ID field [relevant for hashing]. */ 422 423 hash = clusterip_hashfn(skb, cipinfo->config); 424 425 switch (ctinfo) { 426 case IP_CT_NEW: 427 ct->mark = hash; 428 break; 429 case IP_CT_RELATED: 430 case IP_CT_RELATED_REPLY: 431 /* FIXME: we don't handle expectations at the moment. 432 * They can arrive on a different node than 433 * the master connection (e.g. FTP passive mode) */ 434 case IP_CT_ESTABLISHED: 435 case IP_CT_ESTABLISHED_REPLY: 436 break; 437 default: /* Prevent gcc warnings */ 438 break; 439 } 440 441 #ifdef DEBUG 442 nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); 443 #endif 444 pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); 445 if (!clusterip_responsible(cipinfo->config, hash)) { 446 pr_debug("not responsible\n"); 447 return NF_DROP; 448 } 449 pr_debug("responsible\n"); 450 451 /* despite being received via linklayer multicast, this is 452 * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ 453 skb->pkt_type = PACKET_HOST; 454 455 return XT_CONTINUE; 456 } 457 458 static int clusterip_tg_check(const struct xt_tgchk_param *par) 459 { 460 struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; 461 const struct ipt_entry *e = par->entryinfo; 462 struct clusterip_config *config; 463 int ret, i; 464 465 if (par->nft_compat) { 466 pr_err("cannot use CLUSTERIP target from nftables compat\n"); 467 return -EOPNOTSUPP; 468 } 469 470 if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && 471 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && 472 cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { 473 pr_info("unknown mode %u\n", cipinfo->hash_mode); 474 return -EINVAL; 475 476 } 477 if (e->ip.dmsk.s_addr != htonl(0xffffffff) || 478 e->ip.dst.s_addr == 0) { 479 pr_info("Please specify destination IP\n"); 480 return -EINVAL; 481 } 482 if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) { 483 pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes); 484 return -EINVAL; 485 } 486 for (i = 0; i < cipinfo->num_local_nodes; i++) { 487 if (cipinfo->local_nodes[i] - 1 >= 488 sizeof(config->local_nodes) * 8) { 489 pr_info("bad local_nodes[%d] %u\n", 490 i, cipinfo->local_nodes[i]); 491 return -EINVAL; 492 } 493 } 494 495 config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); 496 if (!config) { 497 if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { 498 pr_info("no config found for %pI4, need 'new'\n", 499 &e->ip.dst.s_addr); 500 return -EINVAL; 501 } else { 502 config = clusterip_config_init(par->net, cipinfo, 503 e->ip.dst.s_addr, 504 e->ip.iniface); 505 if (IS_ERR(config)) 506 return PTR_ERR(config); 507 } 508 } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) 509 return -EINVAL; 510 511 ret = nf_ct_netns_get(par->net, par->family); 512 if (ret < 0) { 513 pr_info("cannot load conntrack support for proto=%u\n", 514 par->family); 515 clusterip_config_entry_put(config); 516 clusterip_config_put(config); 517 return ret; 518 } 519 520 if (!par->net->xt.clusterip_deprecated_warning) { 521 pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, " 522 "use xt_cluster instead\n"); 523 par->net->xt.clusterip_deprecated_warning = true; 524 } 525 526 cipinfo->config = config; 527 return ret; 528 } 529 530 /* drop reference count of cluster config when rule is deleted */ 531 static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) 532 { 533 const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; 534 535 /* if no more entries are referencing the config, remove it 536 * from the list and destroy the proc entry */ 537 clusterip_config_entry_put(cipinfo->config); 538 539 clusterip_config_put(cipinfo->config); 540 541 nf_ct_netns_put(par->net, par->family); 542 } 543 544 #ifdef CONFIG_COMPAT 545 struct compat_ipt_clusterip_tgt_info 546 { 547 u_int32_t flags; 548 u_int8_t clustermac[6]; 549 u_int16_t num_total_nodes; 550 u_int16_t num_local_nodes; 551 u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; 552 u_int32_t hash_mode; 553 u_int32_t hash_initval; 554 compat_uptr_t config; 555 }; 556 #endif /* CONFIG_COMPAT */ 557 558 static struct xt_target clusterip_tg_reg __read_mostly = { 559 .name = "CLUSTERIP", 560 .family = NFPROTO_IPV4, 561 .target = clusterip_tg, 562 .checkentry = clusterip_tg_check, 563 .destroy = clusterip_tg_destroy, 564 .targetsize = sizeof(struct ipt_clusterip_tgt_info), 565 .usersize = offsetof(struct ipt_clusterip_tgt_info, config), 566 #ifdef CONFIG_COMPAT 567 .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), 568 #endif /* CONFIG_COMPAT */ 569 .me = THIS_MODULE 570 }; 571 572 573 /*********************************************************************** 574 * ARP MANGLING CODE 575 ***********************************************************************/ 576 577 /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ 578 struct arp_payload { 579 u_int8_t src_hw[ETH_ALEN]; 580 __be32 src_ip; 581 u_int8_t dst_hw[ETH_ALEN]; 582 __be32 dst_ip; 583 } __packed; 584 585 #ifdef DEBUG 586 static void arp_print(struct arp_payload *payload) 587 { 588 #define HBUFFERLEN 30 589 char hbuffer[HBUFFERLEN]; 590 int j, k; 591 592 for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) { 593 hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); 594 hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); 595 hbuffer[k++] = ':'; 596 } 597 hbuffer[--k] = '\0'; 598 599 pr_debug("src %pI4@%s, dst %pI4\n", 600 &payload->src_ip, hbuffer, &payload->dst_ip); 601 } 602 #endif 603 604 static unsigned int 605 arp_mangle(void *priv, 606 struct sk_buff *skb, 607 const struct nf_hook_state *state) 608 { 609 struct arphdr *arp = arp_hdr(skb); 610 struct arp_payload *payload; 611 struct clusterip_config *c; 612 struct net *net = state->net; 613 614 /* we don't care about non-ethernet and non-ipv4 ARP */ 615 if (arp->ar_hrd != htons(ARPHRD_ETHER) || 616 arp->ar_pro != htons(ETH_P_IP) || 617 arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) 618 return NF_ACCEPT; 619 620 /* we only want to mangle arp requests and replies */ 621 if (arp->ar_op != htons(ARPOP_REPLY) && 622 arp->ar_op != htons(ARPOP_REQUEST)) 623 return NF_ACCEPT; 624 625 payload = (void *)(arp+1); 626 627 /* if there is no clusterip configuration for the arp reply's 628 * source ip, we don't want to mangle it */ 629 c = clusterip_config_find_get(net, payload->src_ip, 0); 630 if (!c) 631 return NF_ACCEPT; 632 633 /* normally the linux kernel always replies to arp queries of 634 * addresses on different interfacs. However, in the CLUSTERIP case 635 * this wouldn't work, since we didn't subscribe the mcast group on 636 * other interfaces */ 637 if (c->ifindex != state->out->ifindex) { 638 pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n", 639 c->ifindex, state->out->ifindex); 640 clusterip_config_put(c); 641 return NF_ACCEPT; 642 } 643 644 /* mangle reply hardware address */ 645 memcpy(payload->src_hw, c->clustermac, arp->ar_hln); 646 647 #ifdef DEBUG 648 pr_debug("mangled arp reply: "); 649 arp_print(payload); 650 #endif 651 652 clusterip_config_put(c); 653 654 return NF_ACCEPT; 655 } 656 657 static const struct nf_hook_ops cip_arp_ops = { 658 .hook = arp_mangle, 659 .pf = NFPROTO_ARP, 660 .hooknum = NF_ARP_OUT, 661 .priority = -1 662 }; 663 664 /*********************************************************************** 665 * PROC DIR HANDLING 666 ***********************************************************************/ 667 668 #ifdef CONFIG_PROC_FS 669 670 struct clusterip_seq_position { 671 unsigned int pos; /* position */ 672 unsigned int weight; /* number of bits set == size */ 673 unsigned int bit; /* current bit */ 674 unsigned long val; /* current value */ 675 }; 676 677 static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) 678 { 679 struct clusterip_config *c = s->private; 680 unsigned int weight; 681 u_int32_t local_nodes; 682 struct clusterip_seq_position *idx; 683 684 /* FIXME: possible race */ 685 local_nodes = c->local_nodes; 686 weight = hweight32(local_nodes); 687 if (*pos >= weight) 688 return NULL; 689 690 idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL); 691 if (!idx) 692 return ERR_PTR(-ENOMEM); 693 694 idx->pos = *pos; 695 idx->weight = weight; 696 idx->bit = ffs(local_nodes); 697 idx->val = local_nodes; 698 clear_bit(idx->bit - 1, &idx->val); 699 700 return idx; 701 } 702 703 static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) 704 { 705 struct clusterip_seq_position *idx = v; 706 707 *pos = ++idx->pos; 708 if (*pos >= idx->weight) { 709 kfree(v); 710 return NULL; 711 } 712 idx->bit = ffs(idx->val); 713 clear_bit(idx->bit - 1, &idx->val); 714 return idx; 715 } 716 717 static void clusterip_seq_stop(struct seq_file *s, void *v) 718 { 719 if (!IS_ERR(v)) 720 kfree(v); 721 } 722 723 static int clusterip_seq_show(struct seq_file *s, void *v) 724 { 725 struct clusterip_seq_position *idx = v; 726 727 if (idx->pos != 0) 728 seq_putc(s, ','); 729 730 seq_printf(s, "%u", idx->bit); 731 732 if (idx->pos == idx->weight - 1) 733 seq_putc(s, '\n'); 734 735 return 0; 736 } 737 738 static const struct seq_operations clusterip_seq_ops = { 739 .start = clusterip_seq_start, 740 .next = clusterip_seq_next, 741 .stop = clusterip_seq_stop, 742 .show = clusterip_seq_show, 743 }; 744 745 static int clusterip_proc_open(struct inode *inode, struct file *file) 746 { 747 int ret = seq_open(file, &clusterip_seq_ops); 748 749 if (!ret) { 750 struct seq_file *sf = file->private_data; 751 struct clusterip_config *c = PDE_DATA(inode); 752 753 sf->private = c; 754 755 clusterip_config_get(c); 756 } 757 758 return ret; 759 } 760 761 static int clusterip_proc_release(struct inode *inode, struct file *file) 762 { 763 struct clusterip_config *c = PDE_DATA(inode); 764 int ret; 765 766 ret = seq_release(inode, file); 767 768 if (!ret) 769 clusterip_config_put(c); 770 771 return ret; 772 } 773 774 static ssize_t clusterip_proc_write(struct file *file, const char __user *input, 775 size_t size, loff_t *ofs) 776 { 777 struct clusterip_config *c = PDE_DATA(file_inode(file)); 778 #define PROC_WRITELEN 10 779 char buffer[PROC_WRITELEN+1]; 780 unsigned long nodenum; 781 int rc; 782 783 if (size > PROC_WRITELEN) 784 return -EIO; 785 if (copy_from_user(buffer, input, size)) 786 return -EFAULT; 787 buffer[size] = 0; 788 789 if (*buffer == '+') { 790 rc = kstrtoul(buffer+1, 10, &nodenum); 791 if (rc) 792 return rc; 793 if (clusterip_add_node(c, nodenum)) 794 return -ENOMEM; 795 } else if (*buffer == '-') { 796 rc = kstrtoul(buffer+1, 10, &nodenum); 797 if (rc) 798 return rc; 799 if (clusterip_del_node(c, nodenum)) 800 return -ENOENT; 801 } else 802 return -EIO; 803 804 return size; 805 } 806 807 static const struct proc_ops clusterip_proc_ops = { 808 .proc_open = clusterip_proc_open, 809 .proc_read = seq_read, 810 .proc_write = clusterip_proc_write, 811 .proc_lseek = seq_lseek, 812 .proc_release = clusterip_proc_release, 813 }; 814 815 #endif /* CONFIG_PROC_FS */ 816 817 static int clusterip_net_init(struct net *net) 818 { 819 struct clusterip_net *cn = clusterip_pernet(net); 820 int ret; 821 822 INIT_LIST_HEAD(&cn->configs); 823 824 spin_lock_init(&cn->lock); 825 826 ret = nf_register_net_hook(net, &cip_arp_ops); 827 if (ret < 0) 828 return ret; 829 830 #ifdef CONFIG_PROC_FS 831 cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); 832 if (!cn->procdir) { 833 nf_unregister_net_hook(net, &cip_arp_ops); 834 pr_err("Unable to proc dir entry\n"); 835 return -ENOMEM; 836 } 837 mutex_init(&cn->mutex); 838 #endif /* CONFIG_PROC_FS */ 839 840 return 0; 841 } 842 843 static void clusterip_net_exit(struct net *net) 844 { 845 #ifdef CONFIG_PROC_FS 846 struct clusterip_net *cn = clusterip_pernet(net); 847 848 mutex_lock(&cn->mutex); 849 proc_remove(cn->procdir); 850 cn->procdir = NULL; 851 mutex_unlock(&cn->mutex); 852 #endif 853 nf_unregister_net_hook(net, &cip_arp_ops); 854 } 855 856 static struct pernet_operations clusterip_net_ops = { 857 .init = clusterip_net_init, 858 .exit = clusterip_net_exit, 859 .id = &clusterip_net_id, 860 .size = sizeof(struct clusterip_net), 861 }; 862 863 static struct notifier_block cip_netdev_notifier = { 864 .notifier_call = clusterip_netdev_event 865 }; 866 867 static int __init clusterip_tg_init(void) 868 { 869 int ret; 870 871 ret = register_pernet_subsys(&clusterip_net_ops); 872 if (ret < 0) 873 return ret; 874 875 ret = xt_register_target(&clusterip_tg_reg); 876 if (ret < 0) 877 goto cleanup_subsys; 878 879 ret = register_netdevice_notifier(&cip_netdev_notifier); 880 if (ret < 0) 881 goto unregister_target; 882 883 pr_info("ClusterIP Version %s loaded successfully\n", 884 CLUSTERIP_VERSION); 885 886 return 0; 887 888 unregister_target: 889 xt_unregister_target(&clusterip_tg_reg); 890 cleanup_subsys: 891 unregister_pernet_subsys(&clusterip_net_ops); 892 return ret; 893 } 894 895 static void __exit clusterip_tg_exit(void) 896 { 897 pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); 898 899 unregister_netdevice_notifier(&cip_netdev_notifier); 900 xt_unregister_target(&clusterip_tg_reg); 901 unregister_pernet_subsys(&clusterip_net_ops); 902 903 /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */ 904 rcu_barrier(); 905 } 906 907 module_init(clusterip_tg_init); 908 module_exit(clusterip_tg_exit); 909
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.