~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_red.c

Version: ~ [ linux-5.13-rc5 ] ~ [ linux-5.12.9 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.42 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.124 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.193 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.235 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.271 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.271 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-or-later
  2 /*
  3  * net/sched/sch_red.c  Random Early Detection queue.
  4  *
  5  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6  *
  7  * Changes:
  8  * J Hadi Salim 980914: computation fixes
  9  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
 10  * J Hadi Salim 980816:  ECN support
 11  */
 12 
 13 #include <linux/module.h>
 14 #include <linux/types.h>
 15 #include <linux/kernel.h>
 16 #include <linux/skbuff.h>
 17 #include <net/pkt_sched.h>
 18 #include <net/pkt_cls.h>
 19 #include <net/inet_ecn.h>
 20 #include <net/red.h>
 21 
 22 
 23 /*      Parameters, settable by user:
 24         -----------------------------
 25 
 26         limit           - bytes (must be > qth_max + burst)
 27 
 28         Hard limit on queue length, should be chosen >qth_max
 29         to allow packet bursts. This parameter does not
 30         affect the algorithms behaviour and can be chosen
 31         arbitrarily high (well, less than ram size)
 32         Really, this limit will never be reached
 33         if RED works correctly.
 34  */
 35 
 36 struct red_sched_data {
 37         u32                     limit;          /* HARD maximal queue length */
 38 
 39         unsigned char           flags;
 40         /* Non-flags in tc_red_qopt.flags. */
 41         unsigned char           userbits;
 42 
 43         struct timer_list       adapt_timer;
 44         struct Qdisc            *sch;
 45         struct red_parms        parms;
 46         struct red_vars         vars;
 47         struct red_stats        stats;
 48         struct Qdisc            *qdisc;
 49         struct tcf_qevent       qe_early_drop;
 50         struct tcf_qevent       qe_mark;
 51 };
 52 
 53 #define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
 54 
 55 static inline int red_use_ecn(struct red_sched_data *q)
 56 {
 57         return q->flags & TC_RED_ECN;
 58 }
 59 
 60 static inline int red_use_harddrop(struct red_sched_data *q)
 61 {
 62         return q->flags & TC_RED_HARDDROP;
 63 }
 64 
 65 static int red_use_nodrop(struct red_sched_data *q)
 66 {
 67         return q->flags & TC_RED_NODROP;
 68 }
 69 
 70 static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 71                        struct sk_buff **to_free)
 72 {
 73         struct red_sched_data *q = qdisc_priv(sch);
 74         struct Qdisc *child = q->qdisc;
 75         int ret;
 76 
 77         q->vars.qavg = red_calc_qavg(&q->parms,
 78                                      &q->vars,
 79                                      child->qstats.backlog);
 80 
 81         if (red_is_idling(&q->vars))
 82                 red_end_of_idle_period(&q->vars);
 83 
 84         switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
 85         case RED_DONT_MARK:
 86                 break;
 87 
 88         case RED_PROB_MARK:
 89                 qdisc_qstats_overlimit(sch);
 90                 if (!red_use_ecn(q)) {
 91                         q->stats.prob_drop++;
 92                         goto congestion_drop;
 93                 }
 94 
 95                 if (INET_ECN_set_ce(skb)) {
 96                         q->stats.prob_mark++;
 97                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
 98                         if (!skb)
 99                                 return NET_XMIT_CN | ret;
100                 } else if (!red_use_nodrop(q)) {
101                         q->stats.prob_drop++;
102                         goto congestion_drop;
103                 }
104 
105                 /* Non-ECT packet in ECN nodrop mode: queue it. */
106                 break;
107 
108         case RED_HARD_MARK:
109                 qdisc_qstats_overlimit(sch);
110                 if (red_use_harddrop(q) || !red_use_ecn(q)) {
111                         q->stats.forced_drop++;
112                         goto congestion_drop;
113                 }
114 
115                 if (INET_ECN_set_ce(skb)) {
116                         q->stats.forced_mark++;
117                         skb = tcf_qevent_handle(&q->qe_mark, sch, skb, to_free, &ret);
118                         if (!skb)
119                                 return NET_XMIT_CN | ret;
120                 } else if (!red_use_nodrop(q)) {
121                         q->stats.forced_drop++;
122                         goto congestion_drop;
123                 }
124 
125                 /* Non-ECT packet in ECN nodrop mode: queue it. */
126                 break;
127         }
128 
129         ret = qdisc_enqueue(skb, child, to_free);
130         if (likely(ret == NET_XMIT_SUCCESS)) {
131                 qdisc_qstats_backlog_inc(sch, skb);
132                 sch->q.qlen++;
133         } else if (net_xmit_drop_count(ret)) {
134                 q->stats.pdrop++;
135                 qdisc_qstats_drop(sch);
136         }
137         return ret;
138 
139 congestion_drop:
140         skb = tcf_qevent_handle(&q->qe_early_drop, sch, skb, to_free, &ret);
141         if (!skb)
142                 return NET_XMIT_CN | ret;
143 
144         qdisc_drop(skb, sch, to_free);
145         return NET_XMIT_CN;
146 }
147 
148 static struct sk_buff *red_dequeue(struct Qdisc *sch)
149 {
150         struct sk_buff *skb;
151         struct red_sched_data *q = qdisc_priv(sch);
152         struct Qdisc *child = q->qdisc;
153 
154         skb = child->dequeue(child);
155         if (skb) {
156                 qdisc_bstats_update(sch, skb);
157                 qdisc_qstats_backlog_dec(sch, skb);
158                 sch->q.qlen--;
159         } else {
160                 if (!red_is_idling(&q->vars))
161                         red_start_of_idle_period(&q->vars);
162         }
163         return skb;
164 }
165 
166 static struct sk_buff *red_peek(struct Qdisc *sch)
167 {
168         struct red_sched_data *q = qdisc_priv(sch);
169         struct Qdisc *child = q->qdisc;
170 
171         return child->ops->peek(child);
172 }
173 
174 static void red_reset(struct Qdisc *sch)
175 {
176         struct red_sched_data *q = qdisc_priv(sch);
177 
178         qdisc_reset(q->qdisc);
179         sch->qstats.backlog = 0;
180         sch->q.qlen = 0;
181         red_restart(&q->vars);
182 }
183 
184 static int red_offload(struct Qdisc *sch, bool enable)
185 {
186         struct red_sched_data *q = qdisc_priv(sch);
187         struct net_device *dev = qdisc_dev(sch);
188         struct tc_red_qopt_offload opt = {
189                 .handle = sch->handle,
190                 .parent = sch->parent,
191         };
192 
193         if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
194                 return -EOPNOTSUPP;
195 
196         if (enable) {
197                 opt.command = TC_RED_REPLACE;
198                 opt.set.min = q->parms.qth_min >> q->parms.Wlog;
199                 opt.set.max = q->parms.qth_max >> q->parms.Wlog;
200                 opt.set.probability = q->parms.max_P;
201                 opt.set.limit = q->limit;
202                 opt.set.is_ecn = red_use_ecn(q);
203                 opt.set.is_harddrop = red_use_harddrop(q);
204                 opt.set.is_nodrop = red_use_nodrop(q);
205                 opt.set.qstats = &sch->qstats;
206         } else {
207                 opt.command = TC_RED_DESTROY;
208         }
209 
210         return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
211 }
212 
213 static void red_destroy(struct Qdisc *sch)
214 {
215         struct red_sched_data *q = qdisc_priv(sch);
216 
217         tcf_qevent_destroy(&q->qe_mark, sch);
218         tcf_qevent_destroy(&q->qe_early_drop, sch);
219         del_timer_sync(&q->adapt_timer);
220         red_offload(sch, false);
221         qdisc_put(q->qdisc);
222 }
223 
224 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
225         [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
226         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
227         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
228         [TCA_RED_MAX_P] = { .type = NLA_U32 },
229         [TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
230         [TCA_RED_EARLY_DROP_BLOCK] = { .type = NLA_U32 },
231         [TCA_RED_MARK_BLOCK] = { .type = NLA_U32 },
232 };
233 
234 static int __red_change(struct Qdisc *sch, struct nlattr **tb,
235                         struct netlink_ext_ack *extack)
236 {
237         struct Qdisc *old_child = NULL, *child = NULL;
238         struct red_sched_data *q = qdisc_priv(sch);
239         struct nla_bitfield32 flags_bf;
240         struct tc_red_qopt *ctl;
241         unsigned char userbits;
242         unsigned char flags;
243         int err;
244         u32 max_P;
245 
246         if (tb[TCA_RED_PARMS] == NULL ||
247             tb[TCA_RED_STAB] == NULL)
248                 return -EINVAL;
249 
250         max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
251 
252         ctl = nla_data(tb[TCA_RED_PARMS]);
253         if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
254                 return -EINVAL;
255 
256         err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
257                             tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
258                             &flags_bf, &userbits, extack);
259         if (err)
260                 return err;
261 
262         if (ctl->limit > 0) {
263                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
264                                          extack);
265                 if (IS_ERR(child))
266                         return PTR_ERR(child);
267 
268                 /* child is fifo, no need to check for noop_qdisc */
269                 qdisc_hash_add(child, true);
270         }
271 
272         sch_tree_lock(sch);
273 
274         flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
275         err = red_validate_flags(flags, extack);
276         if (err)
277                 goto unlock_out;
278 
279         q->flags = flags;
280         q->userbits = userbits;
281         q->limit = ctl->limit;
282         if (child) {
283                 qdisc_tree_flush_backlog(q->qdisc);
284                 old_child = q->qdisc;
285                 q->qdisc = child;
286         }
287 
288         red_set_parms(&q->parms,
289                       ctl->qth_min, ctl->qth_max, ctl->Wlog,
290                       ctl->Plog, ctl->Scell_log,
291                       nla_data(tb[TCA_RED_STAB]),
292                       max_P);
293         red_set_vars(&q->vars);
294 
295         del_timer(&q->adapt_timer);
296         if (ctl->flags & TC_RED_ADAPTATIVE)
297                 mod_timer(&q->adapt_timer, jiffies + HZ/2);
298 
299         if (!q->qdisc->q.qlen)
300                 red_start_of_idle_period(&q->vars);
301 
302         sch_tree_unlock(sch);
303 
304         red_offload(sch, true);
305 
306         if (old_child)
307                 qdisc_put(old_child);
308         return 0;
309 
310 unlock_out:
311         sch_tree_unlock(sch);
312         if (child)
313                 qdisc_put(child);
314         return err;
315 }
316 
317 static inline void red_adaptative_timer(struct timer_list *t)
318 {
319         struct red_sched_data *q = from_timer(q, t, adapt_timer);
320         struct Qdisc *sch = q->sch;
321         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
322 
323         spin_lock(root_lock);
324         red_adaptative_algo(&q->parms, &q->vars);
325         mod_timer(&q->adapt_timer, jiffies + HZ/2);
326         spin_unlock(root_lock);
327 }
328 
329 static int red_init(struct Qdisc *sch, struct nlattr *opt,
330                     struct netlink_ext_ack *extack)
331 {
332         struct red_sched_data *q = qdisc_priv(sch);
333         struct nlattr *tb[TCA_RED_MAX + 1];
334         int err;
335 
336         q->qdisc = &noop_qdisc;
337         q->sch = sch;
338         timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
339 
340         if (!opt)
341                 return -EINVAL;
342 
343         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
344                                           extack);
345         if (err < 0)
346                 return err;
347 
348         err = __red_change(sch, tb, extack);
349         if (err)
350                 return err;
351 
352         err = tcf_qevent_init(&q->qe_early_drop, sch,
353                               FLOW_BLOCK_BINDER_TYPE_RED_EARLY_DROP,
354                               tb[TCA_RED_EARLY_DROP_BLOCK], extack);
355         if (err)
356                 return err;
357 
358         return tcf_qevent_init(&q->qe_mark, sch,
359                                FLOW_BLOCK_BINDER_TYPE_RED_MARK,
360                                tb[TCA_RED_MARK_BLOCK], extack);
361 }
362 
363 static int red_change(struct Qdisc *sch, struct nlattr *opt,
364                       struct netlink_ext_ack *extack)
365 {
366         struct red_sched_data *q = qdisc_priv(sch);
367         struct nlattr *tb[TCA_RED_MAX + 1];
368         int err;
369 
370         if (!opt)
371                 return -EINVAL;
372 
373         err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
374                                           extack);
375         if (err < 0)
376                 return err;
377 
378         err = tcf_qevent_validate_change(&q->qe_early_drop,
379                                          tb[TCA_RED_EARLY_DROP_BLOCK], extack);
380         if (err)
381                 return err;
382 
383         err = tcf_qevent_validate_change(&q->qe_mark,
384                                          tb[TCA_RED_MARK_BLOCK], extack);
385         if (err)
386                 return err;
387 
388         return __red_change(sch, tb, extack);
389 }
390 
391 static int red_dump_offload_stats(struct Qdisc *sch)
392 {
393         struct tc_red_qopt_offload hw_stats = {
394                 .command = TC_RED_STATS,
395                 .handle = sch->handle,
396                 .parent = sch->parent,
397                 {
398                         .stats.bstats = &sch->bstats,
399                         .stats.qstats = &sch->qstats,
400                 },
401         };
402 
403         return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
404 }
405 
406 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
407 {
408         struct red_sched_data *q = qdisc_priv(sch);
409         struct nlattr *opts = NULL;
410         struct tc_red_qopt opt = {
411                 .limit          = q->limit,
412                 .flags          = (q->flags & TC_RED_HISTORIC_FLAGS) |
413                                   q->userbits,
414                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
415                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
416                 .Wlog           = q->parms.Wlog,
417                 .Plog           = q->parms.Plog,
418                 .Scell_log      = q->parms.Scell_log,
419         };
420         int err;
421 
422         err = red_dump_offload_stats(sch);
423         if (err)
424                 goto nla_put_failure;
425 
426         opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
427         if (opts == NULL)
428                 goto nla_put_failure;
429         if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
430             nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
431             nla_put_bitfield32(skb, TCA_RED_FLAGS,
432                                q->flags, TC_RED_SUPPORTED_FLAGS) ||
433             tcf_qevent_dump(skb, TCA_RED_MARK_BLOCK, &q->qe_mark) ||
434             tcf_qevent_dump(skb, TCA_RED_EARLY_DROP_BLOCK, &q->qe_early_drop))
435                 goto nla_put_failure;
436         return nla_nest_end(skb, opts);
437 
438 nla_put_failure:
439         nla_nest_cancel(skb, opts);
440         return -EMSGSIZE;
441 }
442 
443 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
444 {
445         struct red_sched_data *q = qdisc_priv(sch);
446         struct net_device *dev = qdisc_dev(sch);
447         struct tc_red_xstats st = {0};
448 
449         if (sch->flags & TCQ_F_OFFLOADED) {
450                 struct tc_red_qopt_offload hw_stats_request = {
451                         .command = TC_RED_XSTATS,
452                         .handle = sch->handle,
453                         .parent = sch->parent,
454                         {
455                                 .xstats = &q->stats,
456                         },
457                 };
458                 dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
459                                               &hw_stats_request);
460         }
461         st.early = q->stats.prob_drop + q->stats.forced_drop;
462         st.pdrop = q->stats.pdrop;
463         st.other = q->stats.other;
464         st.marked = q->stats.prob_mark + q->stats.forced_mark;
465 
466         return gnet_stats_copy_app(d, &st, sizeof(st));
467 }
468 
469 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
470                           struct sk_buff *skb, struct tcmsg *tcm)
471 {
472         struct red_sched_data *q = qdisc_priv(sch);
473 
474         tcm->tcm_handle |= TC_H_MIN(1);
475         tcm->tcm_info = q->qdisc->handle;
476         return 0;
477 }
478 
479 static void red_graft_offload(struct Qdisc *sch,
480                               struct Qdisc *new, struct Qdisc *old,
481                               struct netlink_ext_ack *extack)
482 {
483         struct tc_red_qopt_offload graft_offload = {
484                 .handle         = sch->handle,
485                 .parent         = sch->parent,
486                 .child_handle   = new->handle,
487                 .command        = TC_RED_GRAFT,
488         };
489 
490         qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
491                                    TC_SETUP_QDISC_RED, &graft_offload, extack);
492 }
493 
494 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
495                      struct Qdisc **old, struct netlink_ext_ack *extack)
496 {
497         struct red_sched_data *q = qdisc_priv(sch);
498 
499         if (new == NULL)
500                 new = &noop_qdisc;
501 
502         *old = qdisc_replace(sch, new, &q->qdisc);
503 
504         red_graft_offload(sch, new, *old, extack);
505         return 0;
506 }
507 
508 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
509 {
510         struct red_sched_data *q = qdisc_priv(sch);
511         return q->qdisc;
512 }
513 
514 static unsigned long red_find(struct Qdisc *sch, u32 classid)
515 {
516         return 1;
517 }
518 
519 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
520 {
521         if (!walker->stop) {
522                 if (walker->count >= walker->skip)
523                         if (walker->fn(sch, 1, walker) < 0) {
524                                 walker->stop = 1;
525                                 return;
526                         }
527                 walker->count++;
528         }
529 }
530 
531 static const struct Qdisc_class_ops red_class_ops = {
532         .graft          =       red_graft,
533         .leaf           =       red_leaf,
534         .find           =       red_find,
535         .walk           =       red_walk,
536         .dump           =       red_dump_class,
537 };
538 
539 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
540         .id             =       "red",
541         .priv_size      =       sizeof(struct red_sched_data),
542         .cl_ops         =       &red_class_ops,
543         .enqueue        =       red_enqueue,
544         .dequeue        =       red_dequeue,
545         .peek           =       red_peek,
546         .init           =       red_init,
547         .reset          =       red_reset,
548         .destroy        =       red_destroy,
549         .change         =       red_change,
550         .dump           =       red_dump,
551         .dump_stats     =       red_dump_stats,
552         .owner          =       THIS_MODULE,
553 };
554 
555 static int __init red_module_init(void)
556 {
557         return register_qdisc(&red_qdisc_ops);
558 }
559 
560 static void __exit red_module_exit(void)
561 {
562         unregister_qdisc(&red_qdisc_ops);
563 }
564 
565 module_init(red_module_init)
566 module_exit(red_module_exit)
567 
568 MODULE_LICENSE("GPL");
569 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp