1 /* 2 * ip_vs_proto.c: transport protocol load balancing support for IPVS 3 * 4 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 5 * Julian Anastasov <ja@ssi.bg> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; either version 10 * 2 of the License, or (at your option) any later version. 11 * 12 * Changes: 13 * 14 */ 15 16 #define KMSG_COMPONENT "IPVS" 17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 18 19 #include <linux/module.h> 20 #include <linux/kernel.h> 21 #include <linux/skbuff.h> 22 #include <linux/gfp.h> 23 #include <linux/in.h> 24 #include <linux/ip.h> 25 #include <net/protocol.h> 26 #include <net/tcp.h> 27 #include <net/udp.h> 28 #include <linux/stat.h> 29 #include <linux/proc_fs.h> 30 31 #include <net/ip_vs.h> 32 33 34 /* 35 * IPVS protocols can only be registered/unregistered when the ipvs 36 * module is loaded/unloaded, so no lock is needed in accessing the 37 * ipvs protocol table. 38 */ 39 40 #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ 41 #define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1)) 42 43 static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; 44 45 46 /* 47 * register an ipvs protocol 48 */ 49 static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) 50 { 51 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 52 53 pp->next = ip_vs_proto_table[hash]; 54 ip_vs_proto_table[hash] = pp; 55 56 if (pp->init != NULL) 57 pp->init(pp); 58 59 return 0; 60 } 61 62 /* 63 * register an ipvs protocols netns related data 64 */ 65 static int 66 register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) 67 { 68 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 69 struct ip_vs_proto_data *pd = 70 kzalloc(sizeof(struct ip_vs_proto_data), GFP_KERNEL); 71 72 if (!pd) 73 return -ENOMEM; 74 75 pd->pp = pp; /* For speed issues */ 76 pd->next = ipvs->proto_data_table[hash]; 77 ipvs->proto_data_table[hash] = pd; 78 atomic_set(&pd->appcnt, 0); /* Init app counter */ 79 80 if (pp->init_netns != NULL) { 81 int ret = pp->init_netns(ipvs, pd); 82 if (ret) { 83 /* unlink an free proto data */ 84 ipvs->proto_data_table[hash] = pd->next; 85 kfree(pd); 86 return ret; 87 } 88 } 89 90 return 0; 91 } 92 93 /* 94 * unregister an ipvs protocol 95 */ 96 static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) 97 { 98 struct ip_vs_protocol **pp_p; 99 unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); 100 101 pp_p = &ip_vs_proto_table[hash]; 102 for (; *pp_p; pp_p = &(*pp_p)->next) { 103 if (*pp_p == pp) { 104 *pp_p = pp->next; 105 if (pp->exit != NULL) 106 pp->exit(pp); 107 return 0; 108 } 109 } 110 111 return -ESRCH; 112 } 113 114 /* 115 * unregister an ipvs protocols netns data 116 */ 117 static int 118 unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) 119 { 120 struct ip_vs_proto_data **pd_p; 121 unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); 122 123 pd_p = &ipvs->proto_data_table[hash]; 124 for (; *pd_p; pd_p = &(*pd_p)->next) { 125 if (*pd_p == pd) { 126 *pd_p = pd->next; 127 if (pd->pp->exit_netns != NULL) 128 pd->pp->exit_netns(ipvs, pd); 129 kfree(pd); 130 return 0; 131 } 132 } 133 134 return -ESRCH; 135 } 136 137 /* 138 * get ip_vs_protocol object by its proto. 139 */ 140 struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) 141 { 142 struct ip_vs_protocol *pp; 143 unsigned int hash = IP_VS_PROTO_HASH(proto); 144 145 for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { 146 if (pp->protocol == proto) 147 return pp; 148 } 149 150 return NULL; 151 } 152 EXPORT_SYMBOL(ip_vs_proto_get); 153 154 /* 155 * get ip_vs_protocol object data by netns and proto 156 */ 157 struct ip_vs_proto_data * 158 ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) 159 { 160 struct ip_vs_proto_data *pd; 161 unsigned int hash = IP_VS_PROTO_HASH(proto); 162 163 for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { 164 if (pd->pp->protocol == proto) 165 return pd; 166 } 167 168 return NULL; 169 } 170 EXPORT_SYMBOL(ip_vs_proto_data_get); 171 172 /* 173 * Propagate event for state change to all protocols 174 */ 175 void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) 176 { 177 struct ip_vs_proto_data *pd; 178 int i; 179 180 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 181 for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { 182 if (pd->pp->timeout_change) 183 pd->pp->timeout_change(pd, flags); 184 } 185 } 186 } 187 188 189 int * 190 ip_vs_create_timeout_table(int *table, int size) 191 { 192 return kmemdup(table, size, GFP_KERNEL); 193 } 194 195 196 const char * ip_vs_state_name(__u16 proto, int state) 197 { 198 struct ip_vs_protocol *pp = ip_vs_proto_get(proto); 199 200 if (pp == NULL || pp->state_name == NULL) 201 return (IPPROTO_IP == proto) ? "NONE" : "ERR!"; 202 return pp->state_name(state); 203 } 204 205 206 static void 207 ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, 208 const struct sk_buff *skb, 209 int offset, 210 const char *msg) 211 { 212 char buf[128]; 213 struct iphdr _iph, *ih; 214 215 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 216 if (ih == NULL) 217 sprintf(buf, "TRUNCATED"); 218 else if (ih->frag_off & htons(IP_OFFSET)) 219 sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); 220 else { 221 __be16 _ports[2], *pptr; 222 223 pptr = skb_header_pointer(skb, offset + ih->ihl*4, 224 sizeof(_ports), _ports); 225 if (pptr == NULL) 226 sprintf(buf, "TRUNCATED %pI4->%pI4", 227 &ih->saddr, &ih->daddr); 228 else 229 sprintf(buf, "%pI4:%u->%pI4:%u", 230 &ih->saddr, ntohs(pptr[0]), 231 &ih->daddr, ntohs(pptr[1])); 232 } 233 234 pr_debug("%s: %s %s\n", msg, pp->name, buf); 235 } 236 237 #ifdef CONFIG_IP_VS_IPV6 238 static void 239 ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, 240 const struct sk_buff *skb, 241 int offset, 242 const char *msg) 243 { 244 char buf[192]; 245 struct ipv6hdr _iph, *ih; 246 247 ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); 248 if (ih == NULL) 249 sprintf(buf, "TRUNCATED"); 250 else if (ih->nexthdr == IPPROTO_FRAGMENT) 251 sprintf(buf, "%pI6c->%pI6c frag", &ih->saddr, &ih->daddr); 252 else { 253 __be16 _ports[2], *pptr; 254 255 pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), 256 sizeof(_ports), _ports); 257 if (pptr == NULL) 258 sprintf(buf, "TRUNCATED %pI6c->%pI6c", 259 &ih->saddr, &ih->daddr); 260 else 261 sprintf(buf, "%pI6c:%u->%pI6c:%u", 262 &ih->saddr, ntohs(pptr[0]), 263 &ih->daddr, ntohs(pptr[1])); 264 } 265 266 pr_debug("%s: %s %s\n", msg, pp->name, buf); 267 } 268 #endif 269 270 271 void 272 ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, 273 const struct sk_buff *skb, 274 int offset, 275 const char *msg) 276 { 277 #ifdef CONFIG_IP_VS_IPV6 278 if (af == AF_INET6) 279 ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); 280 else 281 #endif 282 ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); 283 } 284 285 /* 286 * per network name-space init 287 */ 288 int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) 289 { 290 int i, ret; 291 static struct ip_vs_protocol *protos[] = { 292 #ifdef CONFIG_IP_VS_PROTO_TCP 293 &ip_vs_protocol_tcp, 294 #endif 295 #ifdef CONFIG_IP_VS_PROTO_UDP 296 &ip_vs_protocol_udp, 297 #endif 298 #ifdef CONFIG_IP_VS_PROTO_SCTP 299 &ip_vs_protocol_sctp, 300 #endif 301 #ifdef CONFIG_IP_VS_PROTO_AH 302 &ip_vs_protocol_ah, 303 #endif 304 #ifdef CONFIG_IP_VS_PROTO_ESP 305 &ip_vs_protocol_esp, 306 #endif 307 }; 308 309 for (i = 0; i < ARRAY_SIZE(protos); i++) { 310 ret = register_ip_vs_proto_netns(ipvs, protos[i]); 311 if (ret < 0) 312 goto cleanup; 313 } 314 return 0; 315 316 cleanup: 317 ip_vs_protocol_net_cleanup(ipvs); 318 return ret; 319 } 320 321 void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) 322 { 323 struct ip_vs_proto_data *pd; 324 int i; 325 326 /* unregister all the ipvs proto data for this netns */ 327 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 328 while ((pd = ipvs->proto_data_table[i]) != NULL) 329 unregister_ip_vs_proto_netns(ipvs, pd); 330 } 331 } 332 333 int __init ip_vs_protocol_init(void) 334 { 335 char protocols[64]; 336 #define REGISTER_PROTOCOL(p) \ 337 do { \ 338 register_ip_vs_protocol(p); \ 339 strcat(protocols, ", "); \ 340 strcat(protocols, (p)->name); \ 341 } while (0) 342 343 protocols[0] = '\0'; 344 protocols[2] = '\0'; 345 #ifdef CONFIG_IP_VS_PROTO_TCP 346 REGISTER_PROTOCOL(&ip_vs_protocol_tcp); 347 #endif 348 #ifdef CONFIG_IP_VS_PROTO_UDP 349 REGISTER_PROTOCOL(&ip_vs_protocol_udp); 350 #endif 351 #ifdef CONFIG_IP_VS_PROTO_SCTP 352 REGISTER_PROTOCOL(&ip_vs_protocol_sctp); 353 #endif 354 #ifdef CONFIG_IP_VS_PROTO_AH 355 REGISTER_PROTOCOL(&ip_vs_protocol_ah); 356 #endif 357 #ifdef CONFIG_IP_VS_PROTO_ESP 358 REGISTER_PROTOCOL(&ip_vs_protocol_esp); 359 #endif 360 pr_info("Registered protocols (%s)\n", &protocols[2]); 361 362 return 0; 363 } 364 365 366 void ip_vs_protocol_cleanup(void) 367 { 368 struct ip_vs_protocol *pp; 369 int i; 370 371 /* unregister all the ipvs protocols */ 372 for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { 373 while ((pp = ip_vs_proto_table[i]) != NULL) 374 unregister_ip_vs_protocol(pp); 375 } 376 } 377
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.