~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/include/linux/netfilter/ipset/ip_set_ahash.h

Version: ~ [ linux-5.5-rc7 ] ~ [ linux-5.4.13 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.97 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.166 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.210 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.210 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.81 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #ifndef _IP_SET_AHASH_H
  2 #define _IP_SET_AHASH_H
  3 
  4 #include <linux/rcupdate.h>
  5 #include <linux/jhash.h>
  6 #include <linux/netfilter/ipset/ip_set_timeout.h>
  7 
  8 #define CONCAT(a, b, c)         a##b##c
  9 #define TOKEN(a, b, c)          CONCAT(a, b, c)
 10 
 11 #define type_pf_next            TOKEN(TYPE, PF, _elem)
 12 
 13 /* Hashing which uses arrays to resolve clashing. The hash table is resized
 14  * (doubled) when searching becomes too long.
 15  * Internally jhash is used with the assumption that the size of the
 16  * stored data is a multiple of sizeof(u32). If storage supports timeout,
 17  * the timeout field must be the last one in the data structure - that field
 18  * is ignored when computing the hash key.
 19  *
 20  * Readers and resizing
 21  *
 22  * Resizing can be triggered by userspace command only, and those
 23  * are serialized by the nfnl mutex. During resizing the set is
 24  * read-locked, so the only possible concurrent operations are
 25  * the kernel side readers. Those must be protected by proper RCU locking.
 26  */
 27 
 28 /* Number of elements to store in an initial array block */
 29 #define AHASH_INIT_SIZE                 4
 30 /* Max number of elements to store in an array block */
 31 #define AHASH_MAX_SIZE                  (3*AHASH_INIT_SIZE)
 32 
 33 /* Max number of elements can be tuned */
 34 #ifdef IP_SET_HASH_WITH_MULTI
 35 #define AHASH_MAX(h)                    ((h)->ahash_max)
 36 
 37 static inline u8
 38 tune_ahash_max(u8 curr, u32 multi)
 39 {
 40         u32 n;
 41 
 42         if (multi < curr)
 43                 return curr;
 44 
 45         n = curr + AHASH_INIT_SIZE;
 46         /* Currently, at listing one hash bucket must fit into a message.
 47          * Therefore we have a hard limit here.
 48          */
 49         return n > curr && n <= 64 ? n : curr;
 50 }
 51 #define TUNE_AHASH_MAX(h, multi)        \
 52         ((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
 53 #else
 54 #define AHASH_MAX(h)                    AHASH_MAX_SIZE
 55 #define TUNE_AHASH_MAX(h, multi)
 56 #endif
 57 
 58 /* A hash bucket */
 59 struct hbucket {
 60         void *value;            /* the array of the values */
 61         u8 size;                /* size of the array */
 62         u8 pos;                 /* position of the first free entry */
 63 };
 64 
 65 /* The hash table: the table size stored here in order to make resizing easy */
 66 struct htable {
 67         u8 htable_bits;         /* size of hash table == 2^htable_bits */
 68         struct hbucket bucket[0]; /* hashtable buckets */
 69 };
 70 
 71 #define hbucket(h, i)           (&((h)->bucket[i]))
 72 
 73 /* Book-keeping of the prefixes added to the set */
 74 struct ip_set_hash_nets {
 75         u8 cidr;                /* the different cidr values in the set */
 76         u32 nets;               /* number of elements per cidr */
 77 };
 78 
 79 /* The generic ip_set hash structure */
 80 struct ip_set_hash {
 81         struct htable *table;   /* the hash table */
 82         u32 maxelem;            /* max elements in the hash */
 83         u32 elements;           /* current element (vs timeout) */
 84         u32 initval;            /* random jhash init value */
 85         u32 timeout;            /* timeout value, if enabled */
 86         struct timer_list gc;   /* garbage collection when timeout enabled */
 87         struct type_pf_next next; /* temporary storage for uadd */
 88 #ifdef IP_SET_HASH_WITH_MULTI
 89         u8 ahash_max;           /* max elements in an array block */
 90 #endif
 91 #ifdef IP_SET_HASH_WITH_NETMASK
 92         u8 netmask;             /* netmask value for subnets to store */
 93 #endif
 94 #ifdef IP_SET_HASH_WITH_RBTREE
 95         struct rb_root rbtree;
 96 #endif
 97 #ifdef IP_SET_HASH_WITH_NETS
 98         struct ip_set_hash_nets nets[0]; /* book-keeping of prefixes */
 99 #endif
100 };
101 
102 static size_t
103 htable_size(u8 hbits)
104 {
105         size_t hsize;
106 
107         /* We must fit both into u32 in jhash and size_t */
108         if (hbits > 31)
109                 return 0;
110         hsize = jhash_size(hbits);
111         if ((((size_t)-1) - sizeof(struct htable))/sizeof(struct hbucket)
112             < hsize)
113                 return 0;
114 
115         return hsize * sizeof(struct hbucket) + sizeof(struct htable);
116 }
117 
118 /* Compute htable_bits from the user input parameter hashsize */
119 static u8
120 htable_bits(u32 hashsize)
121 {
122         /* Assume that hashsize == 2^htable_bits */
123         u8 bits = fls(hashsize - 1);
124         if (jhash_size(bits) != hashsize)
125                 /* Round up to the first 2^n value */
126                 bits = fls(hashsize);
127 
128         return bits;
129 }
130 
131 #ifdef IP_SET_HASH_WITH_NETS
132 #ifdef IP_SET_HASH_WITH_NETS_PACKED
133 /* When cidr is packed with nomatch, cidr - 1 is stored in the entry */
134 #define CIDR(cidr)      (cidr + 1)
135 #else
136 #define CIDR(cidr)      (cidr)
137 #endif
138 
139 #define SET_HOST_MASK(family)   (family == AF_INET ? 32 : 128)
140 
141 /* Network cidr size book keeping when the hash stores different
142  * sized networks */
143 static void
144 add_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
145 {
146         u8 i;
147 
148         ++h->nets[cidr-1].nets;
149 
150         pr_debug("add_cidr added %u: %u\n", cidr, h->nets[cidr-1].nets);
151 
152         if (h->nets[cidr-1].nets > 1)
153                 return;
154 
155         /* New cidr size */
156         for (i = 0; i < host_mask && h->nets[i].cidr; i++) {
157                 /* Add in increasing prefix order, so larger cidr first */
158                 if (h->nets[i].cidr < cidr)
159                         swap(h->nets[i].cidr, cidr);
160         }
161         if (i < host_mask)
162                 h->nets[i].cidr = cidr;
163 }
164 
165 static void
166 del_cidr(struct ip_set_hash *h, u8 cidr, u8 host_mask)
167 {
168         u8 i;
169 
170         --h->nets[cidr-1].nets;
171 
172         pr_debug("del_cidr deleted %u: %u\n", cidr, h->nets[cidr-1].nets);
173 
174         if (h->nets[cidr-1].nets != 0)
175                 return;
176 
177         /* All entries with this cidr size deleted, so cleanup h->cidr[] */
178         for (i = 0; i < host_mask - 1 && h->nets[i].cidr; i++) {
179                 if (h->nets[i].cidr == cidr)
180                         h->nets[i].cidr = cidr = h->nets[i+1].cidr;
181         }
182         h->nets[i - 1].cidr = 0;
183 }
184 #endif
185 
186 /* Destroy the hashtable part of the set */
187 static void
188 ahash_destroy(struct htable *t)
189 {
190         struct hbucket *n;
191         u32 i;
192 
193         for (i = 0; i < jhash_size(t->htable_bits); i++) {
194                 n = hbucket(t, i);
195                 if (n->size)
196                         /* FIXME: use slab cache */
197                         kfree(n->value);
198         }
199 
200         ip_set_free(t);
201 }
202 
203 /* Calculate the actual memory size of the set data */
204 static size_t
205 ahash_memsize(const struct ip_set_hash *h, size_t dsize, u8 host_mask)
206 {
207         u32 i;
208         struct htable *t = h->table;
209         size_t memsize = sizeof(*h)
210                          + sizeof(*t)
211 #ifdef IP_SET_HASH_WITH_NETS
212                          + sizeof(struct ip_set_hash_nets) * host_mask
213 #endif
214                          + jhash_size(t->htable_bits) * sizeof(struct hbucket);
215 
216         for (i = 0; i < jhash_size(t->htable_bits); i++)
217                         memsize += t->bucket[i].size * dsize;
218 
219         return memsize;
220 }
221 
222 /* Flush a hash type of set: destroy all elements */
223 static void
224 ip_set_hash_flush(struct ip_set *set)
225 {
226         struct ip_set_hash *h = set->data;
227         struct htable *t = h->table;
228         struct hbucket *n;
229         u32 i;
230 
231         for (i = 0; i < jhash_size(t->htable_bits); i++) {
232                 n = hbucket(t, i);
233                 if (n->size) {
234                         n->size = n->pos = 0;
235                         /* FIXME: use slab cache */
236                         kfree(n->value);
237                 }
238         }
239 #ifdef IP_SET_HASH_WITH_NETS
240         memset(h->nets, 0, sizeof(struct ip_set_hash_nets)
241                            * SET_HOST_MASK(set->family));
242 #endif
243         h->elements = 0;
244 }
245 
246 /* Destroy a hash type of set */
247 static void
248 ip_set_hash_destroy(struct ip_set *set)
249 {
250         struct ip_set_hash *h = set->data;
251 
252         if (with_timeout(h->timeout))
253                 del_timer_sync(&h->gc);
254 
255         ahash_destroy(h->table);
256 #ifdef IP_SET_HASH_WITH_RBTREE
257         rbtree_destroy(&h->rbtree);
258 #endif
259         kfree(h);
260 
261         set->data = NULL;
262 }
263 
264 #endif /* _IP_SET_AHASH_H */
265 
266 #ifndef HKEY_DATALEN
267 #define HKEY_DATALEN    sizeof(struct type_pf_elem)
268 #endif
269 
270 #define HKEY(data, initval, htable_bits)                        \
271 (jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)       \
272         & jhash_mask(htable_bits))
273 
274 #define CONCAT(a, b, c)         a##b##c
275 #define TOKEN(a, b, c)          CONCAT(a, b, c)
276 
277 /* Type/family dependent function prototypes */
278 
279 #define type_pf_data_equal      TOKEN(TYPE, PF, _data_equal)
280 #define type_pf_data_isnull     TOKEN(TYPE, PF, _data_isnull)
281 #define type_pf_data_copy       TOKEN(TYPE, PF, _data_copy)
282 #define type_pf_data_zero_out   TOKEN(TYPE, PF, _data_zero_out)
283 #define type_pf_data_netmask    TOKEN(TYPE, PF, _data_netmask)
284 #define type_pf_data_list       TOKEN(TYPE, PF, _data_list)
285 #define type_pf_data_tlist      TOKEN(TYPE, PF, _data_tlist)
286 #define type_pf_data_next       TOKEN(TYPE, PF, _data_next)
287 #define type_pf_data_flags      TOKEN(TYPE, PF, _data_flags)
288 #ifdef IP_SET_HASH_WITH_NETS
289 #define type_pf_data_match      TOKEN(TYPE, PF, _data_match)
290 #else
291 #define type_pf_data_match(d)   1
292 #endif
293 
294 #define type_pf_elem            TOKEN(TYPE, PF, _elem)
295 #define type_pf_telem           TOKEN(TYPE, PF, _telem)
296 #define type_pf_data_timeout    TOKEN(TYPE, PF, _data_timeout)
297 #define type_pf_data_expired    TOKEN(TYPE, PF, _data_expired)
298 #define type_pf_data_timeout_set TOKEN(TYPE, PF, _data_timeout_set)
299 
300 #define type_pf_elem_add        TOKEN(TYPE, PF, _elem_add)
301 #define type_pf_add             TOKEN(TYPE, PF, _add)
302 #define type_pf_del             TOKEN(TYPE, PF, _del)
303 #define type_pf_test_cidrs      TOKEN(TYPE, PF, _test_cidrs)
304 #define type_pf_test            TOKEN(TYPE, PF, _test)
305 
306 #define type_pf_elem_tadd       TOKEN(TYPE, PF, _elem_tadd)
307 #define type_pf_del_telem       TOKEN(TYPE, PF, _ahash_del_telem)
308 #define type_pf_expire          TOKEN(TYPE, PF, _expire)
309 #define type_pf_tadd            TOKEN(TYPE, PF, _tadd)
310 #define type_pf_tdel            TOKEN(TYPE, PF, _tdel)
311 #define type_pf_ttest_cidrs     TOKEN(TYPE, PF, _ahash_ttest_cidrs)
312 #define type_pf_ttest           TOKEN(TYPE, PF, _ahash_ttest)
313 
314 #define type_pf_resize          TOKEN(TYPE, PF, _resize)
315 #define type_pf_tresize         TOKEN(TYPE, PF, _tresize)
316 #define type_pf_flush           ip_set_hash_flush
317 #define type_pf_destroy         ip_set_hash_destroy
318 #define type_pf_head            TOKEN(TYPE, PF, _head)
319 #define type_pf_list            TOKEN(TYPE, PF, _list)
320 #define type_pf_tlist           TOKEN(TYPE, PF, _tlist)
321 #define type_pf_same_set        TOKEN(TYPE, PF, _same_set)
322 #define type_pf_kadt            TOKEN(TYPE, PF, _kadt)
323 #define type_pf_uadt            TOKEN(TYPE, PF, _uadt)
324 #define type_pf_gc              TOKEN(TYPE, PF, _gc)
325 #define type_pf_gc_init         TOKEN(TYPE, PF, _gc_init)
326 #define type_pf_variant         TOKEN(TYPE, PF, _variant)
327 #define type_pf_tvariant        TOKEN(TYPE, PF, _tvariant)
328 
329 /* Flavour without timeout */
330 
331 /* Get the ith element from the array block n */
332 #define ahash_data(n, i)        \
333         ((struct type_pf_elem *)((n)->value) + (i))
334 
335 /* Add an element to the hash table when resizing the set:
336  * we spare the maintenance of the internal counters. */
337 static int
338 type_pf_elem_add(struct hbucket *n, const struct type_pf_elem *value,
339                  u8 ahash_max, u32 cadt_flags)
340 {
341         struct type_pf_elem *data;
342 
343         if (n->pos >= n->size) {
344                 void *tmp;
345 
346                 if (n->size >= ahash_max)
347                         /* Trigger rehashing */
348                         return -EAGAIN;
349 
350                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
351                               * sizeof(struct type_pf_elem),
352                               GFP_ATOMIC);
353                 if (!tmp)
354                         return -ENOMEM;
355                 if (n->size) {
356                         memcpy(tmp, n->value,
357                                sizeof(struct type_pf_elem) * n->size);
358                         kfree(n->value);
359                 }
360                 n->value = tmp;
361                 n->size += AHASH_INIT_SIZE;
362         }
363         data = ahash_data(n, n->pos++);
364         type_pf_data_copy(data, value);
365 #ifdef IP_SET_HASH_WITH_NETS
366         /* Resizing won't overwrite stored flags */
367         if (cadt_flags)
368                 type_pf_data_flags(data, cadt_flags);
369 #endif
370         return 0;
371 }
372 
373 /* Resize a hash: create a new hash table with doubling the hashsize
374  * and inserting the elements to it. Repeat until we succeed or
375  * fail due to memory pressures. */
376 static int
377 type_pf_resize(struct ip_set *set, bool retried)
378 {
379         struct ip_set_hash *h = set->data;
380         struct htable *t, *orig = h->table;
381         u8 htable_bits = orig->htable_bits;
382         const struct type_pf_elem *data;
383         struct hbucket *n, *m;
384         u32 i, j;
385         int ret;
386 
387 retry:
388         ret = 0;
389         htable_bits++;
390         pr_debug("attempt to resize set %s from %u to %u, t %p\n",
391                  set->name, orig->htable_bits, htable_bits, orig);
392         if (!htable_bits) {
393                 /* In case we have plenty of memory :-) */
394                 pr_warning("Cannot increase the hashsize of set %s further\n",
395                            set->name);
396                 return -IPSET_ERR_HASH_FULL;
397         }
398         t = ip_set_alloc(sizeof(*t)
399                          + jhash_size(htable_bits) * sizeof(struct hbucket));
400         if (!t)
401                 return -ENOMEM;
402         t->htable_bits = htable_bits;
403 
404         read_lock_bh(&set->lock);
405         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
406                 n = hbucket(orig, i);
407                 for (j = 0; j < n->pos; j++) {
408                         data = ahash_data(n, j);
409                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
410                         ret = type_pf_elem_add(m, data, AHASH_MAX(h), 0);
411                         if (ret < 0) {
412                                 read_unlock_bh(&set->lock);
413                                 ahash_destroy(t);
414                                 if (ret == -EAGAIN)
415                                         goto retry;
416                                 return ret;
417                         }
418                 }
419         }
420 
421         rcu_assign_pointer(h->table, t);
422         read_unlock_bh(&set->lock);
423 
424         /* Give time to other readers of the set */
425         synchronize_rcu_bh();
426 
427         pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
428                  orig->htable_bits, orig, t->htable_bits, t);
429         ahash_destroy(orig);
430 
431         return 0;
432 }
433 
434 static inline void
435 type_pf_data_next(struct ip_set_hash *h, const struct type_pf_elem *d);
436 
437 /* Add an element to a hash and update the internal counters when succeeded,
438  * otherwise report the proper error code. */
439 static int
440 type_pf_add(struct ip_set *set, void *value, u32 timeout, u32 flags)
441 {
442         struct ip_set_hash *h = set->data;
443         struct htable *t;
444         const struct type_pf_elem *d = value;
445         struct hbucket *n;
446         int i, ret = 0;
447         u32 key, multi = 0;
448         u32 cadt_flags = flags >> 16;
449 
450         if (h->elements >= h->maxelem) {
451                 if (net_ratelimit())
452                         pr_warning("Set %s is full, maxelem %u reached\n",
453                                    set->name, h->maxelem);
454                 return -IPSET_ERR_HASH_FULL;
455         }
456 
457         rcu_read_lock_bh();
458         t = rcu_dereference_bh(h->table);
459         key = HKEY(value, h->initval, t->htable_bits);
460         n = hbucket(t, key);
461         for (i = 0; i < n->pos; i++)
462                 if (type_pf_data_equal(ahash_data(n, i), d, &multi)) {
463 #ifdef IP_SET_HASH_WITH_NETS
464                         if (flags & IPSET_FLAG_EXIST)
465                                 /* Support overwriting just the flags */
466                                 type_pf_data_flags(ahash_data(n, i),
467                                                    cadt_flags);
468 #endif
469                         ret = -IPSET_ERR_EXIST;
470                         goto out;
471                 }
472         TUNE_AHASH_MAX(h, multi);
473         ret = type_pf_elem_add(n, value, AHASH_MAX(h), cadt_flags);
474         if (ret != 0) {
475                 if (ret == -EAGAIN)
476                         type_pf_data_next(h, d);
477                 goto out;
478         }
479 
480 #ifdef IP_SET_HASH_WITH_NETS
481         add_cidr(h, CIDR(d->cidr), HOST_MASK);
482 #endif
483         h->elements++;
484 out:
485         rcu_read_unlock_bh();
486         return ret;
487 }
488 
489 /* Delete an element from the hash: swap it with the last element
490  * and free up space if possible.
491  */
492 static int
493 type_pf_del(struct ip_set *set, void *value, u32 timeout, u32 flags)
494 {
495         struct ip_set_hash *h = set->data;
496         struct htable *t = h->table;
497         const struct type_pf_elem *d = value;
498         struct hbucket *n;
499         int i;
500         struct type_pf_elem *data;
501         u32 key, multi = 0;
502 
503         key = HKEY(value, h->initval, t->htable_bits);
504         n = hbucket(t, key);
505         for (i = 0; i < n->pos; i++) {
506                 data = ahash_data(n, i);
507                 if (!type_pf_data_equal(data, d, &multi))
508                         continue;
509                 if (i != n->pos - 1)
510                         /* Not last one */
511                         type_pf_data_copy(data, ahash_data(n, n->pos - 1));
512 
513                 n->pos--;
514                 h->elements--;
515 #ifdef IP_SET_HASH_WITH_NETS
516                 del_cidr(h, CIDR(d->cidr), HOST_MASK);
517 #endif
518                 if (n->pos + AHASH_INIT_SIZE < n->size) {
519                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
520                                             * sizeof(struct type_pf_elem),
521                                             GFP_ATOMIC);
522                         if (!tmp)
523                                 return 0;
524                         n->size -= AHASH_INIT_SIZE;
525                         memcpy(tmp, n->value,
526                                n->size * sizeof(struct type_pf_elem));
527                         kfree(n->value);
528                         n->value = tmp;
529                 }
530                 return 0;
531         }
532 
533         return -IPSET_ERR_EXIST;
534 }
535 
536 #ifdef IP_SET_HASH_WITH_NETS
537 
538 /* Special test function which takes into account the different network
539  * sizes added to the set */
540 static int
541 type_pf_test_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
542 {
543         struct ip_set_hash *h = set->data;
544         struct htable *t = h->table;
545         struct hbucket *n;
546         const struct type_pf_elem *data;
547         int i, j = 0;
548         u32 key, multi = 0;
549         u8 host_mask = SET_HOST_MASK(set->family);
550 
551         pr_debug("test by nets\n");
552         for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
553                 type_pf_data_netmask(d, h->nets[j].cidr);
554                 key = HKEY(d, h->initval, t->htable_bits);
555                 n = hbucket(t, key);
556                 for (i = 0; i < n->pos; i++) {
557                         data = ahash_data(n, i);
558                         if (type_pf_data_equal(data, d, &multi))
559                                 return type_pf_data_match(data);
560                 }
561         }
562         return 0;
563 }
564 #endif
565 
566 /* Test whether the element is added to the set */
567 static int
568 type_pf_test(struct ip_set *set, void *value, u32 timeout, u32 flags)
569 {
570         struct ip_set_hash *h = set->data;
571         struct htable *t = h->table;
572         struct type_pf_elem *d = value;
573         struct hbucket *n;
574         const struct type_pf_elem *data;
575         int i;
576         u32 key, multi = 0;
577 
578 #ifdef IP_SET_HASH_WITH_NETS
579         /* If we test an IP address and not a network address,
580          * try all possible network sizes */
581         if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
582                 return type_pf_test_cidrs(set, d, timeout);
583 #endif
584 
585         key = HKEY(d, h->initval, t->htable_bits);
586         n = hbucket(t, key);
587         for (i = 0; i < n->pos; i++) {
588                 data = ahash_data(n, i);
589                 if (type_pf_data_equal(data, d, &multi))
590                         return type_pf_data_match(data);
591         }
592         return 0;
593 }
594 
595 /* Reply a HEADER request: fill out the header part of the set */
596 static int
597 type_pf_head(struct ip_set *set, struct sk_buff *skb)
598 {
599         const struct ip_set_hash *h = set->data;
600         struct nlattr *nested;
601         size_t memsize;
602 
603         read_lock_bh(&set->lock);
604         memsize = ahash_memsize(h, with_timeout(h->timeout)
605                                         ? sizeof(struct type_pf_telem)
606                                         : sizeof(struct type_pf_elem),
607                                 set->family == AF_INET ? 32 : 128);
608         read_unlock_bh(&set->lock);
609 
610         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
611         if (!nested)
612                 goto nla_put_failure;
613         if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE,
614                           htonl(jhash_size(h->table->htable_bits))) ||
615             nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem)))
616                 goto nla_put_failure;
617 #ifdef IP_SET_HASH_WITH_NETMASK
618         if (h->netmask != HOST_MASK &&
619             nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask))
620                 goto nla_put_failure;
621 #endif
622         if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref - 1)) ||
623             nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) ||
624             (with_timeout(h->timeout) &&
625              nla_put_net32(skb, IPSET_ATTR_TIMEOUT, htonl(h->timeout))))
626                 goto nla_put_failure;
627         ipset_nest_end(skb, nested);
628 
629         return 0;
630 nla_put_failure:
631         return -EMSGSIZE;
632 }
633 
634 /* Reply a LIST/SAVE request: dump the elements of the specified set */
635 static int
636 type_pf_list(const struct ip_set *set,
637              struct sk_buff *skb, struct netlink_callback *cb)
638 {
639         const struct ip_set_hash *h = set->data;
640         const struct htable *t = h->table;
641         struct nlattr *atd, *nested;
642         const struct hbucket *n;
643         const struct type_pf_elem *data;
644         u32 first = cb->args[2];
645         /* We assume that one hash bucket fills into one page */
646         void *incomplete;
647         int i;
648 
649         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
650         if (!atd)
651                 return -EMSGSIZE;
652         pr_debug("list hash set %s\n", set->name);
653         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
654                 incomplete = skb_tail_pointer(skb);
655                 n = hbucket(t, cb->args[2]);
656                 pr_debug("cb->args[2]: %lu, t %p n %p\n", cb->args[2], t, n);
657                 for (i = 0; i < n->pos; i++) {
658                         data = ahash_data(n, i);
659                         pr_debug("list hash %lu hbucket %p i %u, data %p\n",
660                                  cb->args[2], n, i, data);
661                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
662                         if (!nested) {
663                                 if (cb->args[2] == first) {
664                                         nla_nest_cancel(skb, atd);
665                                         return -EMSGSIZE;
666                                 } else
667                                         goto nla_put_failure;
668                         }
669                         if (type_pf_data_list(skb, data))
670                                 goto nla_put_failure;
671                         ipset_nest_end(skb, nested);
672                 }
673         }
674         ipset_nest_end(skb, atd);
675         /* Set listing finished */
676         cb->args[2] = 0;
677 
678         return 0;
679 
680 nla_put_failure:
681         nlmsg_trim(skb, incomplete);
682         ipset_nest_end(skb, atd);
683         if (unlikely(first == cb->args[2])) {
684                 pr_warning("Can't list set %s: one bucket does not fit into "
685                            "a message. Please report it!\n", set->name);
686                 cb->args[2] = 0;
687                 return -EMSGSIZE;
688         }
689         return 0;
690 }
691 
692 static int
693 type_pf_kadt(struct ip_set *set, const struct sk_buff * skb,
694              const struct xt_action_param *par,
695              enum ipset_adt adt, const struct ip_set_adt_opt *opt);
696 static int
697 type_pf_uadt(struct ip_set *set, struct nlattr *tb[],
698              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
699 
700 static const struct ip_set_type_variant type_pf_variant = {
701         .kadt   = type_pf_kadt,
702         .uadt   = type_pf_uadt,
703         .adt    = {
704                 [IPSET_ADD] = type_pf_add,
705                 [IPSET_DEL] = type_pf_del,
706                 [IPSET_TEST] = type_pf_test,
707         },
708         .destroy = type_pf_destroy,
709         .flush  = type_pf_flush,
710         .head   = type_pf_head,
711         .list   = type_pf_list,
712         .resize = type_pf_resize,
713         .same_set = type_pf_same_set,
714 };
715 
716 /* Flavour with timeout support */
717 
718 #define ahash_tdata(n, i) \
719         (struct type_pf_elem *)((struct type_pf_telem *)((n)->value) + (i))
720 
721 static inline u32
722 type_pf_data_timeout(const struct type_pf_elem *data)
723 {
724         const struct type_pf_telem *tdata =
725                 (const struct type_pf_telem *) data;
726 
727         return tdata->timeout;
728 }
729 
730 static inline bool
731 type_pf_data_expired(const struct type_pf_elem *data)
732 {
733         const struct type_pf_telem *tdata =
734                 (const struct type_pf_telem *) data;
735 
736         return ip_set_timeout_expired(tdata->timeout);
737 }
738 
739 static inline void
740 type_pf_data_timeout_set(struct type_pf_elem *data, u32 timeout)
741 {
742         struct type_pf_telem *tdata = (struct type_pf_telem *) data;
743 
744         tdata->timeout = ip_set_timeout_set(timeout);
745 }
746 
747 static int
748 type_pf_elem_tadd(struct hbucket *n, const struct type_pf_elem *value,
749                   u8 ahash_max, u32 cadt_flags, u32 timeout)
750 {
751         struct type_pf_elem *data;
752 
753         if (n->pos >= n->size) {
754                 void *tmp;
755 
756                 if (n->size >= ahash_max)
757                         /* Trigger rehashing */
758                         return -EAGAIN;
759 
760                 tmp = kzalloc((n->size + AHASH_INIT_SIZE)
761                               * sizeof(struct type_pf_telem),
762                               GFP_ATOMIC);
763                 if (!tmp)
764                         return -ENOMEM;
765                 if (n->size) {
766                         memcpy(tmp, n->value,
767                                sizeof(struct type_pf_telem) * n->size);
768                         kfree(n->value);
769                 }
770                 n->value = tmp;
771                 n->size += AHASH_INIT_SIZE;
772         }
773         data = ahash_tdata(n, n->pos++);
774         type_pf_data_copy(data, value);
775         type_pf_data_timeout_set(data, timeout);
776 #ifdef IP_SET_HASH_WITH_NETS
777         /* Resizing won't overwrite stored flags */
778         if (cadt_flags)
779                 type_pf_data_flags(data, cadt_flags);
780 #endif
781         return 0;
782 }
783 
784 /* Delete expired elements from the hashtable */
785 static void
786 type_pf_expire(struct ip_set_hash *h)
787 {
788         struct htable *t = h->table;
789         struct hbucket *n;
790         struct type_pf_elem *data;
791         u32 i;
792         int j;
793 
794         for (i = 0; i < jhash_size(t->htable_bits); i++) {
795                 n = hbucket(t, i);
796                 for (j = 0; j < n->pos; j++) {
797                         data = ahash_tdata(n, j);
798                         if (type_pf_data_expired(data)) {
799                                 pr_debug("expired %u/%u\n", i, j);
800 #ifdef IP_SET_HASH_WITH_NETS
801                                 del_cidr(h, CIDR(data->cidr), HOST_MASK);
802 #endif
803                                 if (j != n->pos - 1)
804                                         /* Not last one */
805                                         type_pf_data_copy(data,
806                                                 ahash_tdata(n, n->pos - 1));
807                                 n->pos--;
808                                 h->elements--;
809                         }
810                 }
811                 if (n->pos + AHASH_INIT_SIZE < n->size) {
812                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
813                                             * sizeof(struct type_pf_telem),
814                                             GFP_ATOMIC);
815                         if (!tmp)
816                                 /* Still try to delete expired elements */
817                                 continue;
818                         n->size -= AHASH_INIT_SIZE;
819                         memcpy(tmp, n->value,
820                                n->size * sizeof(struct type_pf_telem));
821                         kfree(n->value);
822                         n->value = tmp;
823                 }
824         }
825 }
826 
827 static int
828 type_pf_tresize(struct ip_set *set, bool retried)
829 {
830         struct ip_set_hash *h = set->data;
831         struct htable *t, *orig = h->table;
832         u8 htable_bits = orig->htable_bits;
833         const struct type_pf_elem *data;
834         struct hbucket *n, *m;
835         u32 i, j;
836         int ret;
837 
838         /* Try to cleanup once */
839         if (!retried) {
840                 i = h->elements;
841                 write_lock_bh(&set->lock);
842                 type_pf_expire(set->data);
843                 write_unlock_bh(&set->lock);
844                 if (h->elements <  i)
845                         return 0;
846         }
847 
848 retry:
849         ret = 0;
850         htable_bits++;
851         if (!htable_bits) {
852                 /* In case we have plenty of memory :-) */
853                 pr_warning("Cannot increase the hashsize of set %s further\n",
854                            set->name);
855                 return -IPSET_ERR_HASH_FULL;
856         }
857         t = ip_set_alloc(sizeof(*t)
858                          + jhash_size(htable_bits) * sizeof(struct hbucket));
859         if (!t)
860                 return -ENOMEM;
861         t->htable_bits = htable_bits;
862 
863         read_lock_bh(&set->lock);
864         for (i = 0; i < jhash_size(orig->htable_bits); i++) {
865                 n = hbucket(orig, i);
866                 for (j = 0; j < n->pos; j++) {
867                         data = ahash_tdata(n, j);
868                         m = hbucket(t, HKEY(data, h->initval, htable_bits));
869                         ret = type_pf_elem_tadd(m, data, AHASH_MAX(h), 0,
870                                                 type_pf_data_timeout(data));
871                         if (ret < 0) {
872                                 read_unlock_bh(&set->lock);
873                                 ahash_destroy(t);
874                                 if (ret == -EAGAIN)
875                                         goto retry;
876                                 return ret;
877                         }
878                 }
879         }
880 
881         rcu_assign_pointer(h->table, t);
882         read_unlock_bh(&set->lock);
883 
884         /* Give time to other readers of the set */
885         synchronize_rcu_bh();
886 
887         ahash_destroy(orig);
888 
889         return 0;
890 }
891 
892 static int
893 type_pf_tadd(struct ip_set *set, void *value, u32 timeout, u32 flags)
894 {
895         struct ip_set_hash *h = set->data;
896         struct htable *t = h->table;
897         const struct type_pf_elem *d = value;
898         struct hbucket *n;
899         struct type_pf_elem *data;
900         int ret = 0, i, j = AHASH_MAX(h) + 1;
901         bool flag_exist = flags & IPSET_FLAG_EXIST;
902         u32 key, multi = 0;
903         u32 cadt_flags = flags >> 16;
904 
905         if (h->elements >= h->maxelem)
906                 /* FIXME: when set is full, we slow down here */
907                 type_pf_expire(h);
908         if (h->elements >= h->maxelem) {
909                 if (net_ratelimit())
910                         pr_warning("Set %s is full, maxelem %u reached\n",
911                                    set->name, h->maxelem);
912                 return -IPSET_ERR_HASH_FULL;
913         }
914 
915         rcu_read_lock_bh();
916         t = rcu_dereference_bh(h->table);
917         key = HKEY(d, h->initval, t->htable_bits);
918         n = hbucket(t, key);
919         for (i = 0; i < n->pos; i++) {
920                 data = ahash_tdata(n, i);
921                 if (type_pf_data_equal(data, d, &multi)) {
922                         if (type_pf_data_expired(data) || flag_exist)
923                                 /* Just timeout value may be updated */
924                                 j = i;
925                         else {
926                                 ret = -IPSET_ERR_EXIST;
927                                 goto out;
928                         }
929                 } else if (j == AHASH_MAX(h) + 1 &&
930                            type_pf_data_expired(data))
931                         j = i;
932         }
933         if (j != AHASH_MAX(h) + 1) {
934                 data = ahash_tdata(n, j);
935 #ifdef IP_SET_HASH_WITH_NETS
936                 del_cidr(h, CIDR(data->cidr), HOST_MASK);
937                 add_cidr(h, CIDR(d->cidr), HOST_MASK);
938 #endif
939                 type_pf_data_copy(data, d);
940                 type_pf_data_timeout_set(data, timeout);
941 #ifdef IP_SET_HASH_WITH_NETS
942                 type_pf_data_flags(data, cadt_flags);
943 #endif
944                 goto out;
945         }
946         TUNE_AHASH_MAX(h, multi);
947         ret = type_pf_elem_tadd(n, d, AHASH_MAX(h), cadt_flags, timeout);
948         if (ret != 0) {
949                 if (ret == -EAGAIN)
950                         type_pf_data_next(h, d);
951                 goto out;
952         }
953 
954 #ifdef IP_SET_HASH_WITH_NETS
955         add_cidr(h, CIDR(d->cidr), HOST_MASK);
956 #endif
957         h->elements++;
958 out:
959         rcu_read_unlock_bh();
960         return ret;
961 }
962 
963 static int
964 type_pf_tdel(struct ip_set *set, void *value, u32 timeout, u32 flags)
965 {
966         struct ip_set_hash *h = set->data;
967         struct htable *t = h->table;
968         const struct type_pf_elem *d = value;
969         struct hbucket *n;
970         int i;
971         struct type_pf_elem *data;
972         u32 key, multi = 0;
973 
974         key = HKEY(value, h->initval, t->htable_bits);
975         n = hbucket(t, key);
976         for (i = 0; i < n->pos; i++) {
977                 data = ahash_tdata(n, i);
978                 if (!type_pf_data_equal(data, d, &multi))
979                         continue;
980                 if (type_pf_data_expired(data))
981                         return -IPSET_ERR_EXIST;
982                 if (i != n->pos - 1)
983                         /* Not last one */
984                         type_pf_data_copy(data, ahash_tdata(n, n->pos - 1));
985 
986                 n->pos--;
987                 h->elements--;
988 #ifdef IP_SET_HASH_WITH_NETS
989                 del_cidr(h, CIDR(d->cidr), HOST_MASK);
990 #endif
991                 if (n->pos + AHASH_INIT_SIZE < n->size) {
992                         void *tmp = kzalloc((n->size - AHASH_INIT_SIZE)
993                                             * sizeof(struct type_pf_telem),
994                                             GFP_ATOMIC);
995                         if (!tmp)
996                                 return 0;
997                         n->size -= AHASH_INIT_SIZE;
998                         memcpy(tmp, n->value,
999                                n->size * sizeof(struct type_pf_telem));
1000                         kfree(n->value);
1001                         n->value = tmp;
1002                 }
1003                 return 0;
1004         }
1005 
1006         return -IPSET_ERR_EXIST;
1007 }
1008 
1009 #ifdef IP_SET_HASH_WITH_NETS
1010 static int
1011 type_pf_ttest_cidrs(struct ip_set *set, struct type_pf_elem *d, u32 timeout)
1012 {
1013         struct ip_set_hash *h = set->data;
1014         struct htable *t = h->table;
1015         struct type_pf_elem *data;
1016         struct hbucket *n;
1017         int i, j = 0;
1018         u32 key, multi = 0;
1019         u8 host_mask = SET_HOST_MASK(set->family);
1020 
1021         for (; j < host_mask && h->nets[j].cidr && !multi; j++) {
1022                 type_pf_data_netmask(d, h->nets[j].cidr);
1023                 key = HKEY(d, h->initval, t->htable_bits);
1024                 n = hbucket(t, key);
1025                 for (i = 0; i < n->pos; i++) {
1026                         data = ahash_tdata(n, i);
1027 #ifdef IP_SET_HASH_WITH_MULTI
1028                         if (type_pf_data_equal(data, d, &multi)) {
1029                                 if (!type_pf_data_expired(data))
1030                                         return type_pf_data_match(data);
1031                                 multi = 0;
1032                         }
1033 #else
1034                         if (type_pf_data_equal(data, d, &multi) &&
1035                             !type_pf_data_expired(data))
1036                                 return type_pf_data_match(data);
1037 #endif
1038                 }
1039         }
1040         return 0;
1041 }
1042 #endif
1043 
1044 static int
1045 type_pf_ttest(struct ip_set *set, void *value, u32 timeout, u32 flags)
1046 {
1047         struct ip_set_hash *h = set->data;
1048         struct htable *t = h->table;
1049         struct type_pf_elem *data, *d = value;
1050         struct hbucket *n;
1051         int i;
1052         u32 key, multi = 0;
1053 
1054 #ifdef IP_SET_HASH_WITH_NETS
1055         if (CIDR(d->cidr) == SET_HOST_MASK(set->family))
1056                 return type_pf_ttest_cidrs(set, d, timeout);
1057 #endif
1058         key = HKEY(d, h->initval, t->htable_bits);
1059         n = hbucket(t, key);
1060         for (i = 0; i < n->pos; i++) {
1061                 data = ahash_tdata(n, i);
1062                 if (type_pf_data_equal(data, d, &multi) &&
1063                     !type_pf_data_expired(data))
1064                         return type_pf_data_match(data);
1065         }
1066         return 0;
1067 }
1068 
1069 static int
1070 type_pf_tlist(const struct ip_set *set,
1071               struct sk_buff *skb, struct netlink_callback *cb)
1072 {
1073         const struct ip_set_hash *h = set->data;
1074         const struct htable *t = h->table;
1075         struct nlattr *atd, *nested;
1076         const struct hbucket *n;
1077         const struct type_pf_elem *data;
1078         u32 first = cb->args[2];
1079         /* We assume that one hash bucket fills into one page */
1080         void *incomplete;
1081         int i;
1082 
1083         atd = ipset_nest_start(skb, IPSET_ATTR_ADT);
1084         if (!atd)
1085                 return -EMSGSIZE;
1086         for (; cb->args[2] < jhash_size(t->htable_bits); cb->args[2]++) {
1087                 incomplete = skb_tail_pointer(skb);
1088                 n = hbucket(t, cb->args[2]);
1089                 for (i = 0; i < n->pos; i++) {
1090                         data = ahash_tdata(n, i);
1091                         pr_debug("list %p %u\n", n, i);
1092                         if (type_pf_data_expired(data))
1093                                 continue;
1094                         pr_debug("do list %p %u\n", n, i);
1095                         nested = ipset_nest_start(skb, IPSET_ATTR_DATA);
1096                         if (!nested) {
1097                                 if (cb->args[2] == first) {
1098                                         nla_nest_cancel(skb, atd);
1099                                         return -EMSGSIZE;
1100                                 } else
1101                                         goto nla_put_failure;
1102                         }
1103                         if (type_pf_data_tlist(skb, data))
1104                                 goto nla_put_failure;
1105                         ipset_nest_end(skb, nested);
1106                 }
1107         }
1108         ipset_nest_end(skb, atd);
1109         /* Set listing finished */
1110         cb->args[2] = 0;
1111 
1112         return 0;
1113 
1114 nla_put_failure:
1115         nlmsg_trim(skb, incomplete);
1116         ipset_nest_end(skb, atd);
1117         if (unlikely(first == cb->args[2])) {
1118                 pr_warning("Can't list set %s: one bucket does not fit into "
1119                            "a message. Please report it!\n", set->name);
1120                 cb->args[2] = 0;
1121                 return -EMSGSIZE;
1122         }
1123         return 0;
1124 }
1125 
1126 static const struct ip_set_type_variant type_pf_tvariant = {
1127         .kadt   = type_pf_kadt,
1128         .uadt   = type_pf_uadt,
1129         .adt    = {
1130                 [IPSET_ADD] = type_pf_tadd,
1131                 [IPSET_DEL] = type_pf_tdel,
1132                 [IPSET_TEST] = type_pf_ttest,
1133         },
1134         .destroy = type_pf_destroy,
1135         .flush  = type_pf_flush,
1136         .head   = type_pf_head,
1137         .list   = type_pf_tlist,
1138         .resize = type_pf_tresize,
1139         .same_set = type_pf_same_set,
1140 };
1141 
1142 static void
1143 type_pf_gc(unsigned long ul_set)
1144 {
1145         struct ip_set *set = (struct ip_set *) ul_set;
1146         struct ip_set_hash *h = set->data;
1147 
1148         pr_debug("called\n");
1149         write_lock_bh(&set->lock);
1150         type_pf_expire(h);
1151         write_unlock_bh(&set->lock);
1152 
1153         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1154         add_timer(&h->gc);
1155 }
1156 
1157 static void
1158 type_pf_gc_init(struct ip_set *set)
1159 {
1160         struct ip_set_hash *h = set->data;
1161 
1162         init_timer(&h->gc);
1163         h->gc.data = (unsigned long) set;
1164         h->gc.function = type_pf_gc;
1165         h->gc.expires = jiffies + IPSET_GC_PERIOD(h->timeout) * HZ;
1166         add_timer(&h->gc);
1167         pr_debug("gc initialized, run in every %u\n",
1168                  IPSET_GC_PERIOD(h->timeout));
1169 }
1170 
1171 #undef HKEY_DATALEN
1172 #undef HKEY
1173 #undef type_pf_data_equal
1174 #undef type_pf_data_isnull
1175 #undef type_pf_data_copy
1176 #undef type_pf_data_zero_out
1177 #undef type_pf_data_netmask
1178 #undef type_pf_data_list
1179 #undef type_pf_data_tlist
1180 #undef type_pf_data_next
1181 #undef type_pf_data_flags
1182 #undef type_pf_data_match
1183 
1184 #undef type_pf_elem
1185 #undef type_pf_telem
1186 #undef type_pf_data_timeout
1187 #undef type_pf_data_expired
1188 #undef type_pf_data_timeout_set
1189 
1190 #undef type_pf_elem_add
1191 #undef type_pf_add
1192 #undef type_pf_del
1193 #undef type_pf_test_cidrs
1194 #undef type_pf_test
1195 
1196 #undef type_pf_elem_tadd
1197 #undef type_pf_del_telem
1198 #undef type_pf_expire
1199 #undef type_pf_tadd
1200 #undef type_pf_tdel
1201 #undef type_pf_ttest_cidrs
1202 #undef type_pf_ttest
1203 
1204 #undef type_pf_resize
1205 #undef type_pf_tresize
1206 #undef type_pf_flush
1207 #undef type_pf_destroy
1208 #undef type_pf_head
1209 #undef type_pf_list
1210 #undef type_pf_tlist
1211 #undef type_pf_same_set
1212 #undef type_pf_kadt
1213 #undef type_pf_uadt
1214 #undef type_pf_gc
1215 #undef type_pf_gc_init
1216 #undef type_pf_variant
1217 #undef type_pf_tvariant
1218 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp