~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_choke.c

Version: ~ [ linux-5.13-rc1 ] ~ [ linux-5.12.2 ] ~ [ linux-5.11.19 ] ~ [ linux-5.10.35 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.117 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.190 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.232 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.268 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.268 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * net/sched/sch_choke.c        CHOKE scheduler
  4  *
  5  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
  6  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
  7  */
  8 
  9 #include <linux/module.h>
 10 #include <linux/types.h>
 11 #include <linux/kernel.h>
 12 #include <linux/skbuff.h>
 13 #include <linux/vmalloc.h>
 14 #include <net/pkt_sched.h>
 15 #include <net/pkt_cls.h>
 16 #include <net/inet_ecn.h>
 17 #include <net/red.h>
 18 #include <net/flow_dissector.h>
 19 
 20 /*
 21    CHOKe stateless AQM for fair bandwidth allocation
 22    =================================================
 23 
 24    CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
 25    unresponsive flows) is a variant of RED that penalizes misbehaving flows but
 26    maintains no flow state. The difference from RED is an additional step
 27    during the enqueuing process. If average queue size is over the
 28    low threshold (qmin), a packet is chosen at random from the queue.
 29    If both the new and chosen packet are from the same flow, both
 30    are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
 31    needs to access packets in queue randomly. It has a minimal class
 32    interface to allow overriding the builtin flow classifier with
 33    filters.
 34 
 35    Source:
 36    R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
 37    Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
 38    IEEE INFOCOM, 2000.
 39 
 40    A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
 41    Characteristics", IEEE/ACM Transactions on Networking, 2004
 42 
 43  */
 44 
 45 /* Upper bound on size of sk_buff table (packets) */
 46 #define CHOKE_MAX_QUEUE (128*1024 - 1)
 47 
 48 struct choke_sched_data {
 49 /* Parameters */
 50         u32              limit;
 51         unsigned char    flags;
 52 
 53         struct red_parms parms;
 54 
 55 /* Variables */
 56         struct red_vars  vars;
 57         struct {
 58                 u32     prob_drop;      /* Early probability drops */
 59                 u32     prob_mark;      /* Early probability marks */
 60                 u32     forced_drop;    /* Forced drops, qavg > max_thresh */
 61                 u32     forced_mark;    /* Forced marks, qavg > max_thresh */
 62                 u32     pdrop;          /* Drops due to queue limits */
 63                 u32     other;          /* Drops due to drop() calls */
 64                 u32     matched;        /* Drops to flow match */
 65         } stats;
 66 
 67         unsigned int     head;
 68         unsigned int     tail;
 69 
 70         unsigned int     tab_mask; /* size - 1 */
 71 
 72         struct sk_buff **tab;
 73 };
 74 
 75 /* number of elements in queue including holes */
 76 static unsigned int choke_len(const struct choke_sched_data *q)
 77 {
 78         return (q->tail - q->head) & q->tab_mask;
 79 }
 80 
 81 /* Is ECN parameter configured */
 82 static int use_ecn(const struct choke_sched_data *q)
 83 {
 84         return q->flags & TC_RED_ECN;
 85 }
 86 
 87 /* Should packets over max just be dropped (versus marked) */
 88 static int use_harddrop(const struct choke_sched_data *q)
 89 {
 90         return q->flags & TC_RED_HARDDROP;
 91 }
 92 
 93 /* Move head pointer forward to skip over holes */
 94 static void choke_zap_head_holes(struct choke_sched_data *q)
 95 {
 96         do {
 97                 q->head = (q->head + 1) & q->tab_mask;
 98                 if (q->head == q->tail)
 99                         break;
100         } while (q->tab[q->head] == NULL);
101 }
102 
103 /* Move tail pointer backwards to reuse holes */
104 static void choke_zap_tail_holes(struct choke_sched_data *q)
105 {
106         do {
107                 q->tail = (q->tail - 1) & q->tab_mask;
108                 if (q->head == q->tail)
109                         break;
110         } while (q->tab[q->tail] == NULL);
111 }
112 
113 /* Drop packet from queue array by creating a "hole" */
114 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
115                               struct sk_buff **to_free)
116 {
117         struct choke_sched_data *q = qdisc_priv(sch);
118         struct sk_buff *skb = q->tab[idx];
119 
120         q->tab[idx] = NULL;
121 
122         if (idx == q->head)
123                 choke_zap_head_holes(q);
124         if (idx == q->tail)
125                 choke_zap_tail_holes(q);
126 
127         qdisc_qstats_backlog_dec(sch, skb);
128         qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
129         qdisc_drop(skb, sch, to_free);
130         --sch->q.qlen;
131 }
132 
133 struct choke_skb_cb {
134         u8                      keys_valid;
135         struct                  flow_keys_digest keys;
136 };
137 
138 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
139 {
140         qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
141         return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
142 }
143 
144 /*
145  * Compare flow of two packets
146  *  Returns true only if source and destination address and port match.
147  *          false for special cases
148  */
149 static bool choke_match_flow(struct sk_buff *skb1,
150                              struct sk_buff *skb2)
151 {
152         struct flow_keys temp;
153 
154         if (skb1->protocol != skb2->protocol)
155                 return false;
156 
157         if (!choke_skb_cb(skb1)->keys_valid) {
158                 choke_skb_cb(skb1)->keys_valid = 1;
159                 skb_flow_dissect_flow_keys(skb1, &temp, 0);
160                 make_flow_keys_digest(&choke_skb_cb(skb1)->keys, &temp);
161         }
162 
163         if (!choke_skb_cb(skb2)->keys_valid) {
164                 choke_skb_cb(skb2)->keys_valid = 1;
165                 skb_flow_dissect_flow_keys(skb2, &temp, 0);
166                 make_flow_keys_digest(&choke_skb_cb(skb2)->keys, &temp);
167         }
168 
169         return !memcmp(&choke_skb_cb(skb1)->keys,
170                        &choke_skb_cb(skb2)->keys,
171                        sizeof(choke_skb_cb(skb1)->keys));
172 }
173 
174 /*
175  * Select a packet at random from queue
176  * HACK: since queue can have holes from previous deletion; retry several
177  *   times to find a random skb but then just give up and return the head
178  * Will return NULL if queue is empty (q->head == q->tail)
179  */
180 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
181                                          unsigned int *pidx)
182 {
183         struct sk_buff *skb;
184         int retrys = 3;
185 
186         do {
187                 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
188                 skb = q->tab[*pidx];
189                 if (skb)
190                         return skb;
191         } while (--retrys > 0);
192 
193         return q->tab[*pidx = q->head];
194 }
195 
196 /*
197  * Compare new packet with random packet in queue
198  * returns true if matched and sets *pidx
199  */
200 static bool choke_match_random(const struct choke_sched_data *q,
201                                struct sk_buff *nskb,
202                                unsigned int *pidx)
203 {
204         struct sk_buff *oskb;
205 
206         if (q->head == q->tail)
207                 return false;
208 
209         oskb = choke_peek_random(q, pidx);
210         return choke_match_flow(oskb, nskb);
211 }
212 
213 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch,
214                          struct sk_buff **to_free)
215 {
216         struct choke_sched_data *q = qdisc_priv(sch);
217         const struct red_parms *p = &q->parms;
218 
219         choke_skb_cb(skb)->keys_valid = 0;
220         /* Compute average queue usage (see RED) */
221         q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
222         if (red_is_idling(&q->vars))
223                 red_end_of_idle_period(&q->vars);
224 
225         /* Is queue small? */
226         if (q->vars.qavg <= p->qth_min)
227                 q->vars.qcount = -1;
228         else {
229                 unsigned int idx;
230 
231                 /* Draw a packet at random from queue and compare flow */
232                 if (choke_match_random(q, skb, &idx)) {
233                         q->stats.matched++;
234                         choke_drop_by_idx(sch, idx, to_free);
235                         goto congestion_drop;
236                 }
237 
238                 /* Queue is large, always mark/drop */
239                 if (q->vars.qavg > p->qth_max) {
240                         q->vars.qcount = -1;
241 
242                         qdisc_qstats_overlimit(sch);
243                         if (use_harddrop(q) || !use_ecn(q) ||
244                             !INET_ECN_set_ce(skb)) {
245                                 q->stats.forced_drop++;
246                                 goto congestion_drop;
247                         }
248 
249                         q->stats.forced_mark++;
250                 } else if (++q->vars.qcount) {
251                         if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
252                                 q->vars.qcount = 0;
253                                 q->vars.qR = red_random(p);
254 
255                                 qdisc_qstats_overlimit(sch);
256                                 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
257                                         q->stats.prob_drop++;
258                                         goto congestion_drop;
259                                 }
260 
261                                 q->stats.prob_mark++;
262                         }
263                 } else
264                         q->vars.qR = red_random(p);
265         }
266 
267         /* Admit new packet */
268         if (sch->q.qlen < q->limit) {
269                 q->tab[q->tail] = skb;
270                 q->tail = (q->tail + 1) & q->tab_mask;
271                 ++sch->q.qlen;
272                 qdisc_qstats_backlog_inc(sch, skb);
273                 return NET_XMIT_SUCCESS;
274         }
275 
276         q->stats.pdrop++;
277         return qdisc_drop(skb, sch, to_free);
278 
279 congestion_drop:
280         qdisc_drop(skb, sch, to_free);
281         return NET_XMIT_CN;
282 }
283 
284 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
285 {
286         struct choke_sched_data *q = qdisc_priv(sch);
287         struct sk_buff *skb;
288 
289         if (q->head == q->tail) {
290                 if (!red_is_idling(&q->vars))
291                         red_start_of_idle_period(&q->vars);
292                 return NULL;
293         }
294 
295         skb = q->tab[q->head];
296         q->tab[q->head] = NULL;
297         choke_zap_head_holes(q);
298         --sch->q.qlen;
299         qdisc_qstats_backlog_dec(sch, skb);
300         qdisc_bstats_update(sch, skb);
301 
302         return skb;
303 }
304 
305 static void choke_reset(struct Qdisc *sch)
306 {
307         struct choke_sched_data *q = qdisc_priv(sch);
308 
309         while (q->head != q->tail) {
310                 struct sk_buff *skb = q->tab[q->head];
311 
312                 q->head = (q->head + 1) & q->tab_mask;
313                 if (!skb)
314                         continue;
315                 rtnl_qdisc_drop(skb, sch);
316         }
317 
318         sch->q.qlen = 0;
319         sch->qstats.backlog = 0;
320         if (q->tab)
321                 memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *));
322         q->head = q->tail = 0;
323         red_restart(&q->vars);
324 }
325 
326 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
327         [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
328         [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
329         [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
330 };
331 
332 
333 static void choke_free(void *addr)
334 {
335         kvfree(addr);
336 }
337 
338 static int choke_change(struct Qdisc *sch, struct nlattr *opt,
339                         struct netlink_ext_ack *extack)
340 {
341         struct choke_sched_data *q = qdisc_priv(sch);
342         struct nlattr *tb[TCA_CHOKE_MAX + 1];
343         const struct tc_red_qopt *ctl;
344         int err;
345         struct sk_buff **old = NULL;
346         unsigned int mask;
347         u32 max_P;
348 
349         if (opt == NULL)
350                 return -EINVAL;
351 
352         err = nla_parse_nested_deprecated(tb, TCA_CHOKE_MAX, opt,
353                                           choke_policy, NULL);
354         if (err < 0)
355                 return err;
356 
357         if (tb[TCA_CHOKE_PARMS] == NULL ||
358             tb[TCA_CHOKE_STAB] == NULL)
359                 return -EINVAL;
360 
361         max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
362 
363         ctl = nla_data(tb[TCA_CHOKE_PARMS]);
364 
365         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
366                 return -EINVAL;
367 
368         if (ctl->limit > CHOKE_MAX_QUEUE)
369                 return -EINVAL;
370 
371         mask = roundup_pow_of_two(ctl->limit + 1) - 1;
372         if (mask != q->tab_mask) {
373                 struct sk_buff **ntab;
374 
375                 ntab = kvcalloc(mask + 1, sizeof(struct sk_buff *), GFP_KERNEL);
376                 if (!ntab)
377                         return -ENOMEM;
378 
379                 sch_tree_lock(sch);
380                 old = q->tab;
381                 if (old) {
382                         unsigned int oqlen = sch->q.qlen, tail = 0;
383                         unsigned dropped = 0;
384 
385                         while (q->head != q->tail) {
386                                 struct sk_buff *skb = q->tab[q->head];
387 
388                                 q->head = (q->head + 1) & q->tab_mask;
389                                 if (!skb)
390                                         continue;
391                                 if (tail < mask) {
392                                         ntab[tail++] = skb;
393                                         continue;
394                                 }
395                                 dropped += qdisc_pkt_len(skb);
396                                 qdisc_qstats_backlog_dec(sch, skb);
397                                 --sch->q.qlen;
398                                 rtnl_qdisc_drop(skb, sch);
399                         }
400                         qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
401                         q->head = 0;
402                         q->tail = tail;
403                 }
404 
405                 q->tab_mask = mask;
406                 q->tab = ntab;
407         } else
408                 sch_tree_lock(sch);
409 
410         q->flags = ctl->flags;
411         q->limit = ctl->limit;
412 
413         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
414                       ctl->Plog, ctl->Scell_log,
415                       nla_data(tb[TCA_CHOKE_STAB]),
416                       max_P);
417         red_set_vars(&q->vars);
418 
419         if (q->head == q->tail)
420                 red_end_of_idle_period(&q->vars);
421 
422         sch_tree_unlock(sch);
423         choke_free(old);
424         return 0;
425 }
426 
427 static int choke_init(struct Qdisc *sch, struct nlattr *opt,
428                       struct netlink_ext_ack *extack)
429 {
430         return choke_change(sch, opt, extack);
431 }
432 
433 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
434 {
435         struct choke_sched_data *q = qdisc_priv(sch);
436         struct nlattr *opts = NULL;
437         struct tc_red_qopt opt = {
438                 .limit          = q->limit,
439                 .flags          = q->flags,
440                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
441                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
442                 .Wlog           = q->parms.Wlog,
443                 .Plog           = q->parms.Plog,
444                 .Scell_log      = q->parms.Scell_log,
445         };
446 
447         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
448         if (opts == NULL)
449                 goto nla_put_failure;
450 
451         if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
452             nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
453                 goto nla_put_failure;
454         return nla_nest_end(skb, opts);
455 
456 nla_put_failure:
457         nla_nest_cancel(skb, opts);
458         return -EMSGSIZE;
459 }
460 
461 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
462 {
463         struct choke_sched_data *q = qdisc_priv(sch);
464         struct tc_choke_xstats st = {
465                 .early  = q->stats.prob_drop + q->stats.forced_drop,
466                 .marked = q->stats.prob_mark + q->stats.forced_mark,
467                 .pdrop  = q->stats.pdrop,
468                 .other  = q->stats.other,
469                 .matched = q->stats.matched,
470         };
471 
472         return gnet_stats_copy_app(d, &st, sizeof(st));
473 }
474 
475 static void choke_destroy(struct Qdisc *sch)
476 {
477         struct choke_sched_data *q = qdisc_priv(sch);
478 
479         choke_free(q->tab);
480 }
481 
482 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
483 {
484         struct choke_sched_data *q = qdisc_priv(sch);
485 
486         return (q->head != q->tail) ? q->tab[q->head] : NULL;
487 }
488 
489 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
490         .id             =       "choke",
491         .priv_size      =       sizeof(struct choke_sched_data),
492 
493         .enqueue        =       choke_enqueue,
494         .dequeue        =       choke_dequeue,
495         .peek           =       choke_peek_head,
496         .init           =       choke_init,
497         .destroy        =       choke_destroy,
498         .reset          =       choke_reset,
499         .change         =       choke_change,
500         .dump           =       choke_dump,
501         .dump_stats     =       choke_dump_stats,
502         .owner          =       THIS_MODULE,
503 };
504 
505 static int __init choke_module_init(void)
506 {
507         return register_qdisc(&choke_qdisc_ops);
508 }
509 
510 static void __exit choke_module_exit(void)
511 {
512         unregister_qdisc(&choke_qdisc_ops);
513 }
514 
515 module_init(choke_module_init)
516 module_exit(choke_module_exit)
517 
518 MODULE_LICENSE("GPL");
519 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp