~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_cbq.c

Version: ~ [ linux-6.6-rc1 ] ~ [ linux-6.5.2 ] ~ [ linux-6.4.15 ] ~ [ linux-6.3.13 ] ~ [ linux-6.2.16 ] ~ [ linux-6.1.52 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.131 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.194 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.256 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.294 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.325 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  *
 11  */
 12 
 13 #include <linux/module.h>
 14 #include <linux/slab.h>
 15 #include <linux/types.h>
 16 #include <linux/kernel.h>
 17 #include <linux/string.h>
 18 #include <linux/errno.h>
 19 #include <linux/skbuff.h>
 20 #include <net/netlink.h>
 21 #include <net/pkt_sched.h>
 22 
 23 
 24 /*      Class-Based Queueing (CBQ) algorithm.
 25         =======================================
 26 
 27         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
 28                  Management Models for Packet Networks",
 29                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
 30 
 31                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
 32 
 33                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
 34                  Parameters", 1996
 35 
 36                  [4] Sally Floyd and Michael Speer, "Experimental Results
 37                  for Class-Based Queueing", 1998, not published.
 38 
 39         -----------------------------------------------------------------------
 40 
 41         Algorithm skeleton was taken from NS simulator cbq.cc.
 42         If someone wants to check this code against the LBL version,
 43         he should take into account that ONLY the skeleton was borrowed,
 44         the implementation is different. Particularly:
 45 
 46         --- The WRR algorithm is different. Our version looks more
 47         reasonable (I hope) and works when quanta are allowed to be
 48         less than MTU, which is always the case when real time classes
 49         have small rates. Note, that the statement of [3] is
 50         incomplete, delay may actually be estimated even if class
 51         per-round allotment is less than MTU. Namely, if per-round
 52         allotment is W*r_i, and r_1+...+r_k = r < 1
 53 
 54         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
 55 
 56         In the worst case we have IntServ estimate with D = W*r+k*MTU
 57         and C = MTU*r. The proof (if correct at all) is trivial.
 58 
 59 
 60         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
 61         interpret some places, which look like wrong translations
 62         from NS. Anyone is advised to find these differences
 63         and explain to me, why I am wrong 8).
 64 
 65         --- Linux has no EOI event, so that we cannot estimate true class
 66         idle time. Workaround is to consider the next dequeue event
 67         as sign that previous packet is finished. This is wrong because of
 68         internal device queueing, but on a permanently loaded link it is true.
 69         Moreover, combined with clock integrator, this scheme looks
 70         very close to an ideal solution.  */
 71 
 72 struct cbq_sched_data;
 73 
 74 
 75 struct cbq_class {
 76         struct Qdisc_class_common common;
 77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
 78 
 79 /* Parameters */
 80         unsigned char           priority;       /* class priority */
 81         unsigned char           priority2;      /* priority to be used after overlimit */
 82         unsigned char           ewma_log;       /* time constant for idle time calculation */
 83         unsigned char           ovl_strategy;
 84 #ifdef CONFIG_NET_CLS_ACT
 85         unsigned char           police;
 86 #endif
 87 
 88         u32                     defmap;
 89 
 90         /* Link-sharing scheduler parameters */
 91         long                    maxidle;        /* Class parameters: see below. */
 92         long                    offtime;
 93         long                    minidle;
 94         u32                     avpkt;
 95         struct qdisc_rate_table *R_tab;
 96 
 97         /* Overlimit strategy parameters */
 98         void                    (*overlimit)(struct cbq_class *cl);
 99         psched_tdiff_t          penalty;
100 
101         /* General scheduler (WRR) parameters */
102         long                    allot;
103         long                    quantum;        /* Allotment per WRR round */
104         long                    weight;         /* Relative allotment: see below */
105 
106         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
107         struct cbq_class        *split;         /* Ptr to split node */
108         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
109         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
110         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
111                                                    parent otherwise */
112         struct cbq_class        *sibling;       /* Sibling chain */
113         struct cbq_class        *children;      /* Pointer to children chain */
114 
115         struct Qdisc            *q;             /* Elementary queueing discipline */
116 
117 
118 /* Variables */
119         unsigned char           cpriority;      /* Effective priority */
120         unsigned char           delayed;
121         unsigned char           level;          /* level of the class in hierarchy:
122                                                    0 for leaf classes, and maximal
123                                                    level of children + 1 for nodes.
124                                                  */
125 
126         psched_time_t           last;           /* Last end of service */
127         psched_time_t           undertime;
128         long                    avgidle;
129         long                    deficit;        /* Saved deficit for WRR */
130         psched_time_t           penalized;
131         struct gnet_stats_basic_packed bstats;
132         struct gnet_stats_queue qstats;
133         struct gnet_stats_rate_est64 rate_est;
134         struct tc_cbq_xstats    xstats;
135 
136         struct tcf_proto __rcu  *filter_list;
137 
138         int                     refcnt;
139         int                     filters;
140 
141         struct cbq_class        *defaults[TC_PRIO_MAX + 1];
142 };
143 
144 struct cbq_sched_data {
145         struct Qdisc_class_hash clhash;                 /* Hash table of all classes */
146         int                     nclasses[TC_CBQ_MAXPRIO + 1];
147         unsigned int            quanta[TC_CBQ_MAXPRIO + 1];
148 
149         struct cbq_class        link;
150 
151         unsigned int            activemask;
152         struct cbq_class        *active[TC_CBQ_MAXPRIO + 1];    /* List of all classes
153                                                                    with backlog */
154 
155 #ifdef CONFIG_NET_CLS_ACT
156         struct cbq_class        *rx_class;
157 #endif
158         struct cbq_class        *tx_class;
159         struct cbq_class        *tx_borrowed;
160         int                     tx_len;
161         psched_time_t           now;            /* Cached timestamp */
162         unsigned int            pmask;
163 
164         struct hrtimer          delay_timer;
165         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
166                                                    started when CBQ has
167                                                    backlog, but cannot
168                                                    transmit just now */
169         psched_tdiff_t          wd_expires;
170         int                     toplevel;
171         u32                     hgenerator;
172 };
173 
174 
175 #define L2T(cl, len)    qdisc_l2t((cl)->R_tab, len)
176 
177 static inline struct cbq_class *
178 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
179 {
180         struct Qdisc_class_common *clc;
181 
182         clc = qdisc_class_find(&q->clhash, classid);
183         if (clc == NULL)
184                 return NULL;
185         return container_of(clc, struct cbq_class, common);
186 }
187 
188 #ifdef CONFIG_NET_CLS_ACT
189 
190 static struct cbq_class *
191 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
192 {
193         struct cbq_class *cl;
194 
195         for (cl = this->tparent; cl; cl = cl->tparent) {
196                 struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
197 
198                 if (new != NULL && new != this)
199                         return new;
200         }
201         return NULL;
202 }
203 
204 #endif
205 
206 /* Classify packet. The procedure is pretty complicated, but
207  * it allows us to combine link sharing and priority scheduling
208  * transparently.
209  *
210  * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
211  * so that it resolves to split nodes. Then packets are classified
212  * by logical priority, or a more specific classifier may be attached
213  * to the split node.
214  */
215 
216 static struct cbq_class *
217 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
218 {
219         struct cbq_sched_data *q = qdisc_priv(sch);
220         struct cbq_class *head = &q->link;
221         struct cbq_class **defmap;
222         struct cbq_class *cl = NULL;
223         u32 prio = skb->priority;
224         struct tcf_proto *fl;
225         struct tcf_result res;
226 
227         /*
228          *  Step 1. If skb->priority points to one of our classes, use it.
229          */
230         if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
231             (cl = cbq_class_lookup(q, prio)) != NULL)
232                 return cl;
233 
234         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
235         for (;;) {
236                 int result = 0;
237                 defmap = head->defaults;
238 
239                 fl = rcu_dereference_bh(head->filter_list);
240                 /*
241                  * Step 2+n. Apply classifier.
242                  */
243                 result = tc_classify(skb, fl, &res, true);
244                 if (!fl || result < 0)
245                         goto fallback;
246 
247                 cl = (void *)res.class;
248                 if (!cl) {
249                         if (TC_H_MAJ(res.classid))
250                                 cl = cbq_class_lookup(q, res.classid);
251                         else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
252                                 cl = defmap[TC_PRIO_BESTEFFORT];
253 
254                         if (cl == NULL)
255                                 goto fallback;
256                 }
257                 if (cl->level >= head->level)
258                         goto fallback;
259 #ifdef CONFIG_NET_CLS_ACT
260                 switch (result) {
261                 case TC_ACT_QUEUED:
262                 case TC_ACT_STOLEN:
263                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
264                 case TC_ACT_SHOT:
265                         return NULL;
266                 case TC_ACT_RECLASSIFY:
267                         return cbq_reclassify(skb, cl);
268                 }
269 #endif
270                 if (cl->level == 0)
271                         return cl;
272 
273                 /*
274                  * Step 3+n. If classifier selected a link sharing class,
275                  *         apply agency specific classifier.
276                  *         Repeat this procdure until we hit a leaf node.
277                  */
278                 head = cl;
279         }
280 
281 fallback:
282         cl = head;
283 
284         /*
285          * Step 4. No success...
286          */
287         if (TC_H_MAJ(prio) == 0 &&
288             !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
289             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
290                 return head;
291 
292         return cl;
293 }
294 
295 /*
296  * A packet has just been enqueued on the empty class.
297  * cbq_activate_class adds it to the tail of active class list
298  * of its priority band.
299  */
300 
301 static inline void cbq_activate_class(struct cbq_class *cl)
302 {
303         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
304         int prio = cl->cpriority;
305         struct cbq_class *cl_tail;
306 
307         cl_tail = q->active[prio];
308         q->active[prio] = cl;
309 
310         if (cl_tail != NULL) {
311                 cl->next_alive = cl_tail->next_alive;
312                 cl_tail->next_alive = cl;
313         } else {
314                 cl->next_alive = cl;
315                 q->activemask |= (1<<prio);
316         }
317 }
318 
319 /*
320  * Unlink class from active chain.
321  * Note that this same procedure is done directly in cbq_dequeue*
322  * during round-robin procedure.
323  */
324 
325 static void cbq_deactivate_class(struct cbq_class *this)
326 {
327         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
328         int prio = this->cpriority;
329         struct cbq_class *cl;
330         struct cbq_class *cl_prev = q->active[prio];
331 
332         do {
333                 cl = cl_prev->next_alive;
334                 if (cl == this) {
335                         cl_prev->next_alive = cl->next_alive;
336                         cl->next_alive = NULL;
337 
338                         if (cl == q->active[prio]) {
339                                 q->active[prio] = cl_prev;
340                                 if (cl == q->active[prio]) {
341                                         q->active[prio] = NULL;
342                                         q->activemask &= ~(1<<prio);
343                                         return;
344                                 }
345                         }
346                         return;
347                 }
348         } while ((cl_prev = cl) != q->active[prio]);
349 }
350 
351 static void
352 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
353 {
354         int toplevel = q->toplevel;
355 
356         if (toplevel > cl->level && !(qdisc_is_throttled(cl->q))) {
357                 psched_time_t now = psched_get_time();
358 
359                 do {
360                         if (cl->undertime < now) {
361                                 q->toplevel = cl->level;
362                                 return;
363                         }
364                 } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
365         }
366 }
367 
368 static int
369 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
370 {
371         struct cbq_sched_data *q = qdisc_priv(sch);
372         int uninitialized_var(ret);
373         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
374 
375 #ifdef CONFIG_NET_CLS_ACT
376         q->rx_class = cl;
377 #endif
378         if (cl == NULL) {
379                 if (ret & __NET_XMIT_BYPASS)
380                         qdisc_qstats_drop(sch);
381                 kfree_skb(skb);
382                 return ret;
383         }
384 
385 #ifdef CONFIG_NET_CLS_ACT
386         cl->q->__parent = sch;
387 #endif
388         ret = qdisc_enqueue(skb, cl->q);
389         if (ret == NET_XMIT_SUCCESS) {
390                 sch->q.qlen++;
391                 cbq_mark_toplevel(q, cl);
392                 if (!cl->next_alive)
393                         cbq_activate_class(cl);
394                 return ret;
395         }
396 
397         if (net_xmit_drop_count(ret)) {
398                 qdisc_qstats_drop(sch);
399                 cbq_mark_toplevel(q, cl);
400                 cl->qstats.drops++;
401         }
402         return ret;
403 }
404 
405 /* Overlimit actions */
406 
407 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
408 
409 static void cbq_ovl_classic(struct cbq_class *cl)
410 {
411         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
412         psched_tdiff_t delay = cl->undertime - q->now;
413 
414         if (!cl->delayed) {
415                 delay += cl->offtime;
416 
417                 /*
418                  * Class goes to sleep, so that it will have no
419                  * chance to work avgidle. Let's forgive it 8)
420                  *
421                  * BTW cbq-2.0 has a crap in this
422                  * place, apparently they forgot to shift it by cl->ewma_log.
423                  */
424                 if (cl->avgidle < 0)
425                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
426                 if (cl->avgidle < cl->minidle)
427                         cl->avgidle = cl->minidle;
428                 if (delay <= 0)
429                         delay = 1;
430                 cl->undertime = q->now + delay;
431 
432                 cl->xstats.overactions++;
433                 cl->delayed = 1;
434         }
435         if (q->wd_expires == 0 || q->wd_expires > delay)
436                 q->wd_expires = delay;
437 
438         /* Dirty work! We must schedule wakeups based on
439          * real available rate, rather than leaf rate,
440          * which may be tiny (even zero).
441          */
442         if (q->toplevel == TC_CBQ_MAXLEVEL) {
443                 struct cbq_class *b;
444                 psched_tdiff_t base_delay = q->wd_expires;
445 
446                 for (b = cl->borrow; b; b = b->borrow) {
447                         delay = b->undertime - q->now;
448                         if (delay < base_delay) {
449                                 if (delay <= 0)
450                                         delay = 1;
451                                 base_delay = delay;
452                         }
453                 }
454 
455                 q->wd_expires = base_delay;
456         }
457 }
458 
459 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
460  * they go overlimit
461  */
462 
463 static void cbq_ovl_rclassic(struct cbq_class *cl)
464 {
465         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
466         struct cbq_class *this = cl;
467 
468         do {
469                 if (cl->level > q->toplevel) {
470                         cl = NULL;
471                         break;
472                 }
473         } while ((cl = cl->borrow) != NULL);
474 
475         if (cl == NULL)
476                 cl = this;
477         cbq_ovl_classic(cl);
478 }
479 
480 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
481 
482 static void cbq_ovl_delay(struct cbq_class *cl)
483 {
484         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
485         psched_tdiff_t delay = cl->undertime - q->now;
486 
487         if (test_bit(__QDISC_STATE_DEACTIVATED,
488                      &qdisc_root_sleeping(cl->qdisc)->state))
489                 return;
490 
491         if (!cl->delayed) {
492                 psched_time_t sched = q->now;
493                 ktime_t expires;
494 
495                 delay += cl->offtime;
496                 if (cl->avgidle < 0)
497                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
498                 if (cl->avgidle < cl->minidle)
499                         cl->avgidle = cl->minidle;
500                 cl->undertime = q->now + delay;
501 
502                 if (delay > 0) {
503                         sched += delay + cl->penalty;
504                         cl->penalized = sched;
505                         cl->cpriority = TC_CBQ_MAXPRIO;
506                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
507 
508                         expires = ns_to_ktime(PSCHED_TICKS2NS(sched));
509                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
510                             ktime_to_ns(ktime_sub(
511                                         hrtimer_get_expires(&q->delay_timer),
512                                         expires)) > 0)
513                                 hrtimer_set_expires(&q->delay_timer, expires);
514                         hrtimer_restart(&q->delay_timer);
515                         cl->delayed = 1;
516                         cl->xstats.overactions++;
517                         return;
518                 }
519                 delay = 1;
520         }
521         if (q->wd_expires == 0 || q->wd_expires > delay)
522                 q->wd_expires = delay;
523 }
524 
525 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
526 
527 static void cbq_ovl_lowprio(struct cbq_class *cl)
528 {
529         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
530 
531         cl->penalized = q->now + cl->penalty;
532 
533         if (cl->cpriority != cl->priority2) {
534                 cl->cpriority = cl->priority2;
535                 q->pmask |= (1<<cl->cpriority);
536                 cl->xstats.overactions++;
537         }
538         cbq_ovl_classic(cl);
539 }
540 
541 /* TC_CBQ_OVL_DROP: penalize class by dropping */
542 
543 static void cbq_ovl_drop(struct cbq_class *cl)
544 {
545         if (cl->q->ops->drop)
546                 if (cl->q->ops->drop(cl->q))
547                         cl->qdisc->q.qlen--;
548         cl->xstats.overactions++;
549         cbq_ovl_classic(cl);
550 }
551 
552 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
553                                        psched_time_t now)
554 {
555         struct cbq_class *cl;
556         struct cbq_class *cl_prev = q->active[prio];
557         psched_time_t sched = now;
558 
559         if (cl_prev == NULL)
560                 return 0;
561 
562         do {
563                 cl = cl_prev->next_alive;
564                 if (now - cl->penalized > 0) {
565                         cl_prev->next_alive = cl->next_alive;
566                         cl->next_alive = NULL;
567                         cl->cpriority = cl->priority;
568                         cl->delayed = 0;
569                         cbq_activate_class(cl);
570 
571                         if (cl == q->active[prio]) {
572                                 q->active[prio] = cl_prev;
573                                 if (cl == q->active[prio]) {
574                                         q->active[prio] = NULL;
575                                         return 0;
576                                 }
577                         }
578 
579                         cl = cl_prev->next_alive;
580                 } else if (sched - cl->penalized > 0)
581                         sched = cl->penalized;
582         } while ((cl_prev = cl) != q->active[prio]);
583 
584         return sched - now;
585 }
586 
587 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
588 {
589         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
590                                                 delay_timer);
591         struct Qdisc *sch = q->watchdog.qdisc;
592         psched_time_t now;
593         psched_tdiff_t delay = 0;
594         unsigned int pmask;
595 
596         now = psched_get_time();
597 
598         pmask = q->pmask;
599         q->pmask = 0;
600 
601         while (pmask) {
602                 int prio = ffz(~pmask);
603                 psched_tdiff_t tmp;
604 
605                 pmask &= ~(1<<prio);
606 
607                 tmp = cbq_undelay_prio(q, prio, now);
608                 if (tmp > 0) {
609                         q->pmask |= 1<<prio;
610                         if (tmp < delay || delay == 0)
611                                 delay = tmp;
612                 }
613         }
614 
615         if (delay) {
616                 ktime_t time;
617 
618                 time = ktime_set(0, 0);
619                 time = ktime_add_ns(time, PSCHED_TICKS2NS(now + delay));
620                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS_PINNED);
621         }
622 
623         qdisc_unthrottled(sch);
624         __netif_schedule(qdisc_root(sch));
625         return HRTIMER_NORESTART;
626 }
627 
628 #ifdef CONFIG_NET_CLS_ACT
629 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
630 {
631         struct Qdisc *sch = child->__parent;
632         struct cbq_sched_data *q = qdisc_priv(sch);
633         struct cbq_class *cl = q->rx_class;
634 
635         q->rx_class = NULL;
636 
637         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
638                 int ret;
639 
640                 cbq_mark_toplevel(q, cl);
641 
642                 q->rx_class = cl;
643                 cl->q->__parent = sch;
644 
645                 ret = qdisc_enqueue(skb, cl->q);
646                 if (ret == NET_XMIT_SUCCESS) {
647                         sch->q.qlen++;
648                         if (!cl->next_alive)
649                                 cbq_activate_class(cl);
650                         return 0;
651                 }
652                 if (net_xmit_drop_count(ret))
653                         qdisc_qstats_drop(sch);
654                 return 0;
655         }
656 
657         qdisc_qstats_drop(sch);
658         return -1;
659 }
660 #endif
661 
662 /*
663  * It is mission critical procedure.
664  *
665  * We "regenerate" toplevel cutoff, if transmitting class
666  * has backlog and it is not regulated. It is not part of
667  * original CBQ description, but looks more reasonable.
668  * Probably, it is wrong. This question needs further investigation.
669  */
670 
671 static inline void
672 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
673                     struct cbq_class *borrowed)
674 {
675         if (cl && q->toplevel >= borrowed->level) {
676                 if (cl->q->q.qlen > 1) {
677                         do {
678                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
679                                         q->toplevel = borrowed->level;
680                                         return;
681                                 }
682                         } while ((borrowed = borrowed->borrow) != NULL);
683                 }
684 #if 0
685         /* It is not necessary now. Uncommenting it
686            will save CPU cycles, but decrease fairness.
687          */
688                 q->toplevel = TC_CBQ_MAXLEVEL;
689 #endif
690         }
691 }
692 
693 static void
694 cbq_update(struct cbq_sched_data *q)
695 {
696         struct cbq_class *this = q->tx_class;
697         struct cbq_class *cl = this;
698         int len = q->tx_len;
699         psched_time_t now;
700 
701         q->tx_class = NULL;
702         /* Time integrator. We calculate EOS time
703          * by adding expected packet transmission time.
704          */
705         now = q->now + L2T(&q->link, len);
706 
707         for ( ; cl; cl = cl->share) {
708                 long avgidle = cl->avgidle;
709                 long idle;
710 
711                 cl->bstats.packets++;
712                 cl->bstats.bytes += len;
713 
714                 /*
715                  * (now - last) is total time between packet right edges.
716                  * (last_pktlen/rate) is "virtual" busy time, so that
717                  *
718                  *      idle = (now - last) - last_pktlen/rate
719                  */
720 
721                 idle = now - cl->last;
722                 if ((unsigned long)idle > 128*1024*1024) {
723                         avgidle = cl->maxidle;
724                 } else {
725                         idle -= L2T(cl, len);
726 
727                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
728                  * where W=2^{-ewma_log}. But cl->avgidle is scaled:
729                  * cl->avgidle == true_avgidle/W,
730                  * hence:
731                  */
732                         avgidle += idle - (avgidle>>cl->ewma_log);
733                 }
734 
735                 if (avgidle <= 0) {
736                         /* Overlimit or at-limit */
737 
738                         if (avgidle < cl->minidle)
739                                 avgidle = cl->minidle;
740 
741                         cl->avgidle = avgidle;
742 
743                         /* Calculate expected time, when this class
744                          * will be allowed to send.
745                          * It will occur, when:
746                          * (1-W)*true_avgidle + W*delay = 0, i.e.
747                          * idle = (1/W - 1)*(-true_avgidle)
748                          * or
749                          * idle = (1 - W)*(-cl->avgidle);
750                          */
751                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
752 
753                         /*
754                          * That is not all.
755                          * To maintain the rate allocated to the class,
756                          * we add to undertime virtual clock,
757                          * necessary to complete transmitted packet.
758                          * (len/phys_bandwidth has been already passed
759                          * to the moment of cbq_update)
760                          */
761 
762                         idle -= L2T(&q->link, len);
763                         idle += L2T(cl, len);
764 
765                         cl->undertime = now + idle;
766                 } else {
767                         /* Underlimit */
768 
769                         cl->undertime = PSCHED_PASTPERFECT;
770                         if (avgidle > cl->maxidle)
771                                 cl->avgidle = cl->maxidle;
772                         else
773                                 cl->avgidle = avgidle;
774                 }
775                 if ((s64)(now - cl->last) > 0)
776                         cl->last = now;
777         }
778 
779         cbq_update_toplevel(q, this, q->tx_borrowed);
780 }
781 
782 static inline struct cbq_class *
783 cbq_under_limit(struct cbq_class *cl)
784 {
785         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
786         struct cbq_class *this_cl = cl;
787 
788         if (cl->tparent == NULL)
789                 return cl;
790 
791         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
792                 cl->delayed = 0;
793                 return cl;
794         }
795 
796         do {
797                 /* It is very suspicious place. Now overlimit
798                  * action is generated for not bounded classes
799                  * only if link is completely congested.
800                  * Though it is in agree with ancestor-only paradigm,
801                  * it looks very stupid. Particularly,
802                  * it means that this chunk of code will either
803                  * never be called or result in strong amplification
804                  * of burstiness. Dangerous, silly, and, however,
805                  * no another solution exists.
806                  */
807                 cl = cl->borrow;
808                 if (!cl) {
809                         this_cl->qstats.overlimits++;
810                         this_cl->overlimit(this_cl);
811                         return NULL;
812                 }
813                 if (cl->level > q->toplevel)
814                         return NULL;
815         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
816 
817         cl->delayed = 0;
818         return cl;
819 }
820 
821 static inline struct sk_buff *
822 cbq_dequeue_prio(struct Qdisc *sch, int prio)
823 {
824         struct cbq_sched_data *q = qdisc_priv(sch);
825         struct cbq_class *cl_tail, *cl_prev, *cl;
826         struct sk_buff *skb;
827         int deficit;
828 
829         cl_tail = cl_prev = q->active[prio];
830         cl = cl_prev->next_alive;
831 
832         do {
833                 deficit = 0;
834 
835                 /* Start round */
836                 do {
837                         struct cbq_class *borrow = cl;
838 
839                         if (cl->q->q.qlen &&
840                             (borrow = cbq_under_limit(cl)) == NULL)
841                                 goto skip_class;
842 
843                         if (cl->deficit <= 0) {
844                                 /* Class exhausted its allotment per
845                                  * this round. Switch to the next one.
846                                  */
847                                 deficit = 1;
848                                 cl->deficit += cl->quantum;
849                                 goto next_class;
850                         }
851 
852                         skb = cl->q->dequeue(cl->q);
853 
854                         /* Class did not give us any skb :-(
855                          * It could occur even if cl->q->q.qlen != 0
856                          * f.e. if cl->q == "tbf"
857                          */
858                         if (skb == NULL)
859                                 goto skip_class;
860 
861                         cl->deficit -= qdisc_pkt_len(skb);
862                         q->tx_class = cl;
863                         q->tx_borrowed = borrow;
864                         if (borrow != cl) {
865 #ifndef CBQ_XSTATS_BORROWS_BYTES
866                                 borrow->xstats.borrows++;
867                                 cl->xstats.borrows++;
868 #else
869                                 borrow->xstats.borrows += qdisc_pkt_len(skb);
870                                 cl->xstats.borrows += qdisc_pkt_len(skb);
871 #endif
872                         }
873                         q->tx_len = qdisc_pkt_len(skb);
874 
875                         if (cl->deficit <= 0) {
876                                 q->active[prio] = cl;
877                                 cl = cl->next_alive;
878                                 cl->deficit += cl->quantum;
879                         }
880                         return skb;
881 
882 skip_class:
883                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
884                                 /* Class is empty or penalized.
885                                  * Unlink it from active chain.
886                                  */
887                                 cl_prev->next_alive = cl->next_alive;
888                                 cl->next_alive = NULL;
889 
890                                 /* Did cl_tail point to it? */
891                                 if (cl == cl_tail) {
892                                         /* Repair it! */
893                                         cl_tail = cl_prev;
894 
895                                         /* Was it the last class in this band? */
896                                         if (cl == cl_tail) {
897                                                 /* Kill the band! */
898                                                 q->active[prio] = NULL;
899                                                 q->activemask &= ~(1<<prio);
900                                                 if (cl->q->q.qlen)
901                                                         cbq_activate_class(cl);
902                                                 return NULL;
903                                         }
904 
905                                         q->active[prio] = cl_tail;
906                                 }
907                                 if (cl->q->q.qlen)
908                                         cbq_activate_class(cl);
909 
910                                 cl = cl_prev;
911                         }
912 
913 next_class:
914                         cl_prev = cl;
915                         cl = cl->next_alive;
916                 } while (cl_prev != cl_tail);
917         } while (deficit);
918 
919         q->active[prio] = cl_prev;
920 
921         return NULL;
922 }
923 
924 static inline struct sk_buff *
925 cbq_dequeue_1(struct Qdisc *sch)
926 {
927         struct cbq_sched_data *q = qdisc_priv(sch);
928         struct sk_buff *skb;
929         unsigned int activemask;
930 
931         activemask = q->activemask & 0xFF;
932         while (activemask) {
933                 int prio = ffz(~activemask);
934                 activemask &= ~(1<<prio);
935                 skb = cbq_dequeue_prio(sch, prio);
936                 if (skb)
937                         return skb;
938         }
939         return NULL;
940 }
941 
942 static struct sk_buff *
943 cbq_dequeue(struct Qdisc *sch)
944 {
945         struct sk_buff *skb;
946         struct cbq_sched_data *q = qdisc_priv(sch);
947         psched_time_t now;
948 
949         now = psched_get_time();
950 
951         if (q->tx_class)
952                 cbq_update(q);
953 
954         q->now = now;
955 
956         for (;;) {
957                 q->wd_expires = 0;
958 
959                 skb = cbq_dequeue_1(sch);
960                 if (skb) {
961                         qdisc_bstats_update(sch, skb);
962                         sch->q.qlen--;
963                         qdisc_unthrottled(sch);
964                         return skb;
965                 }
966 
967                 /* All the classes are overlimit.
968                  *
969                  * It is possible, if:
970                  *
971                  * 1. Scheduler is empty.
972                  * 2. Toplevel cutoff inhibited borrowing.
973                  * 3. Root class is overlimit.
974                  *
975                  * Reset 2d and 3d conditions and retry.
976                  *
977                  * Note, that NS and cbq-2.0 are buggy, peeking
978                  * an arbitrary class is appropriate for ancestor-only
979                  * sharing, but not for toplevel algorithm.
980                  *
981                  * Our version is better, but slower, because it requires
982                  * two passes, but it is unavoidable with top-level sharing.
983                  */
984 
985                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
986                     q->link.undertime == PSCHED_PASTPERFECT)
987                         break;
988 
989                 q->toplevel = TC_CBQ_MAXLEVEL;
990                 q->link.undertime = PSCHED_PASTPERFECT;
991         }
992 
993         /* No packets in scheduler or nobody wants to give them to us :-(
994          * Sigh... start watchdog timer in the last case.
995          */
996 
997         if (sch->q.qlen) {
998                 qdisc_qstats_overlimit(sch);
999                 if (q->wd_expires)
1000                         qdisc_watchdog_schedule(&q->watchdog,
1001                                                 now + q->wd_expires);
1002         }
1003         return NULL;
1004 }
1005 
1006 /* CBQ class maintanance routines */
1007 
1008 static void cbq_adjust_levels(struct cbq_class *this)
1009 {
1010         if (this == NULL)
1011                 return;
1012 
1013         do {
1014                 int level = 0;
1015                 struct cbq_class *cl;
1016 
1017                 cl = this->children;
1018                 if (cl) {
1019                         do {
1020                                 if (cl->level > level)
1021                                         level = cl->level;
1022                         } while ((cl = cl->sibling) != this->children);
1023                 }
1024                 this->level = level + 1;
1025         } while ((this = this->tparent) != NULL);
1026 }
1027 
1028 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1029 {
1030         struct cbq_class *cl;
1031         unsigned int h;
1032 
1033         if (q->quanta[prio] == 0)
1034                 return;
1035 
1036         for (h = 0; h < q->clhash.hashsize; h++) {
1037                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1038                         /* BUGGGG... Beware! This expression suffer of
1039                          * arithmetic overflows!
1040                          */
1041                         if (cl->priority == prio) {
1042                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1043                                         q->quanta[prio];
1044                         }
1045                         if (cl->quantum <= 0 ||
1046                             cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
1047                                 pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
1048                                         cl->common.classid, cl->quantum);
1049                                 cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
1050                         }
1051                 }
1052         }
1053 }
1054 
1055 static void cbq_sync_defmap(struct cbq_class *cl)
1056 {
1057         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1058         struct cbq_class *split = cl->split;
1059         unsigned int h;
1060         int i;
1061 
1062         if (split == NULL)
1063                 return;
1064 
1065         for (i = 0; i <= TC_PRIO_MAX; i++) {
1066                 if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
1067                         split->defaults[i] = NULL;
1068         }
1069 
1070         for (i = 0; i <= TC_PRIO_MAX; i++) {
1071                 int level = split->level;
1072 
1073                 if (split->defaults[i])
1074                         continue;
1075 
1076                 for (h = 0; h < q->clhash.hashsize; h++) {
1077                         struct cbq_class *c;
1078 
1079                         hlist_for_each_entry(c, &q->clhash.hash[h],
1080                                              common.hnode) {
1081                                 if (c->split == split && c->level < level &&
1082                                     c->defmap & (1<<i)) {
1083                                         split->defaults[i] = c;
1084                                         level = c->level;
1085                                 }
1086                         }
1087                 }
1088         }
1089 }
1090 
1091 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1092 {
1093         struct cbq_class *split = NULL;
1094 
1095         if (splitid == 0) {
1096                 split = cl->split;
1097                 if (!split)
1098                         return;
1099                 splitid = split->common.classid;
1100         }
1101 
1102         if (split == NULL || split->common.classid != splitid) {
1103                 for (split = cl->tparent; split; split = split->tparent)
1104                         if (split->common.classid == splitid)
1105                                 break;
1106         }
1107 
1108         if (split == NULL)
1109                 return;
1110 
1111         if (cl->split != split) {
1112                 cl->defmap = 0;
1113                 cbq_sync_defmap(cl);
1114                 cl->split = split;
1115                 cl->defmap = def & mask;
1116         } else
1117                 cl->defmap = (cl->defmap & ~mask) | (def & mask);
1118 
1119         cbq_sync_defmap(cl);
1120 }
1121 
1122 static void cbq_unlink_class(struct cbq_class *this)
1123 {
1124         struct cbq_class *cl, **clp;
1125         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1126 
1127         qdisc_class_hash_remove(&q->clhash, &this->common);
1128 
1129         if (this->tparent) {
1130                 clp = &this->sibling;
1131                 cl = *clp;
1132                 do {
1133                         if (cl == this) {
1134                                 *clp = cl->sibling;
1135                                 break;
1136                         }
1137                         clp = &cl->sibling;
1138                 } while ((cl = *clp) != this->sibling);
1139 
1140                 if (this->tparent->children == this) {
1141                         this->tparent->children = this->sibling;
1142                         if (this->sibling == this)
1143                                 this->tparent->children = NULL;
1144                 }
1145         } else {
1146                 WARN_ON(this->sibling != this);
1147         }
1148 }
1149 
1150 static void cbq_link_class(struct cbq_class *this)
1151 {
1152         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1153         struct cbq_class *parent = this->tparent;
1154 
1155         this->sibling = this;
1156         qdisc_class_hash_insert(&q->clhash, &this->common);
1157 
1158         if (parent == NULL)
1159                 return;
1160 
1161         if (parent->children == NULL) {
1162                 parent->children = this;
1163         } else {
1164                 this->sibling = parent->children->sibling;
1165                 parent->children->sibling = this;
1166         }
1167 }
1168 
1169 static unsigned int cbq_drop(struct Qdisc *sch)
1170 {
1171         struct cbq_sched_data *q = qdisc_priv(sch);
1172         struct cbq_class *cl, *cl_head;
1173         int prio;
1174         unsigned int len;
1175 
1176         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1177                 cl_head = q->active[prio];
1178                 if (!cl_head)
1179                         continue;
1180 
1181                 cl = cl_head;
1182                 do {
1183                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1184                                 sch->q.qlen--;
1185                                 if (!cl->q->q.qlen)
1186                                         cbq_deactivate_class(cl);
1187                                 return len;
1188                         }
1189                 } while ((cl = cl->next_alive) != cl_head);
1190         }
1191         return 0;
1192 }
1193 
1194 static void
1195 cbq_reset(struct Qdisc *sch)
1196 {
1197         struct cbq_sched_data *q = qdisc_priv(sch);
1198         struct cbq_class *cl;
1199         int prio;
1200         unsigned int h;
1201 
1202         q->activemask = 0;
1203         q->pmask = 0;
1204         q->tx_class = NULL;
1205         q->tx_borrowed = NULL;
1206         qdisc_watchdog_cancel(&q->watchdog);
1207         hrtimer_cancel(&q->delay_timer);
1208         q->toplevel = TC_CBQ_MAXLEVEL;
1209         q->now = psched_get_time();
1210 
1211         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1212                 q->active[prio] = NULL;
1213 
1214         for (h = 0; h < q->clhash.hashsize; h++) {
1215                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
1216                         qdisc_reset(cl->q);
1217 
1218                         cl->next_alive = NULL;
1219                         cl->undertime = PSCHED_PASTPERFECT;
1220                         cl->avgidle = cl->maxidle;
1221                         cl->deficit = cl->quantum;
1222                         cl->cpriority = cl->priority;
1223                 }
1224         }
1225         sch->q.qlen = 0;
1226 }
1227 
1228 
1229 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1230 {
1231         if (lss->change & TCF_CBQ_LSS_FLAGS) {
1232                 cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1233                 cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1234         }
1235         if (lss->change & TCF_CBQ_LSS_EWMA)
1236                 cl->ewma_log = lss->ewma_log;
1237         if (lss->change & TCF_CBQ_LSS_AVPKT)
1238                 cl->avpkt = lss->avpkt;
1239         if (lss->change & TCF_CBQ_LSS_MINIDLE)
1240                 cl->minidle = -(long)lss->minidle;
1241         if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
1242                 cl->maxidle = lss->maxidle;
1243                 cl->avgidle = lss->maxidle;
1244         }
1245         if (lss->change & TCF_CBQ_LSS_OFFTIME)
1246                 cl->offtime = lss->offtime;
1247         return 0;
1248 }
1249 
1250 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1251 {
1252         q->nclasses[cl->priority]--;
1253         q->quanta[cl->priority] -= cl->weight;
1254         cbq_normalize_quanta(q, cl->priority);
1255 }
1256 
1257 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1258 {
1259         q->nclasses[cl->priority]++;
1260         q->quanta[cl->priority] += cl->weight;
1261         cbq_normalize_quanta(q, cl->priority);
1262 }
1263 
1264 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1265 {
1266         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1267 
1268         if (wrr->allot)
1269                 cl->allot = wrr->allot;
1270         if (wrr->weight)
1271                 cl->weight = wrr->weight;
1272         if (wrr->priority) {
1273                 cl->priority = wrr->priority - 1;
1274                 cl->cpriority = cl->priority;
1275                 if (cl->priority >= cl->priority2)
1276                         cl->priority2 = TC_CBQ_MAXPRIO - 1;
1277         }
1278 
1279         cbq_addprio(q, cl);
1280         return 0;
1281 }
1282 
1283 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1284 {
1285         switch (ovl->strategy) {
1286         case TC_CBQ_OVL_CLASSIC:
1287                 cl->overlimit = cbq_ovl_classic;
1288                 break;
1289         case TC_CBQ_OVL_DELAY:
1290                 cl->overlimit = cbq_ovl_delay;
1291                 break;
1292         case TC_CBQ_OVL_LOWPRIO:
1293                 if (ovl->priority2 - 1 >= TC_CBQ_MAXPRIO ||
1294                     ovl->priority2 - 1 <= cl->priority)
1295                         return -EINVAL;
1296                 cl->priority2 = ovl->priority2 - 1;
1297                 cl->overlimit = cbq_ovl_lowprio;
1298                 break;
1299         case TC_CBQ_OVL_DROP:
1300                 cl->overlimit = cbq_ovl_drop;
1301                 break;
1302         case TC_CBQ_OVL_RCLASSIC:
1303                 cl->overlimit = cbq_ovl_rclassic;
1304                 break;
1305         default:
1306                 return -EINVAL;
1307         }
1308         cl->penalty = ovl->penalty;
1309         return 0;
1310 }
1311 
1312 #ifdef CONFIG_NET_CLS_ACT
1313 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1314 {
1315         cl->police = p->police;
1316 
1317         if (cl->q->handle) {
1318                 if (p->police == TC_POLICE_RECLASSIFY)
1319                         cl->q->reshape_fail = cbq_reshape_fail;
1320                 else
1321                         cl->q->reshape_fail = NULL;
1322         }
1323         return 0;
1324 }
1325 #endif
1326 
1327 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1328 {
1329         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1330         return 0;
1331 }
1332 
1333 static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
1334         [TCA_CBQ_LSSOPT]        = { .len = sizeof(struct tc_cbq_lssopt) },
1335         [TCA_CBQ_WRROPT]        = { .len = sizeof(struct tc_cbq_wrropt) },
1336         [TCA_CBQ_FOPT]          = { .len = sizeof(struct tc_cbq_fopt) },
1337         [TCA_CBQ_OVL_STRATEGY]  = { .len = sizeof(struct tc_cbq_ovl) },
1338         [TCA_CBQ_RATE]          = { .len = sizeof(struct tc_ratespec) },
1339         [TCA_CBQ_RTAB]          = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
1340         [TCA_CBQ_POLICE]        = { .len = sizeof(struct tc_cbq_police) },
1341 };
1342 
1343 static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1], struct nlattr *opt)
1344 {
1345         int err;
1346 
1347         if (!opt)
1348                 return -EINVAL;
1349 
1350         err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy);
1351         if (err < 0)
1352                 return err;
1353 
1354         if (tb[TCA_CBQ_WRROPT]) {
1355                 const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
1356 
1357                 if (wrr->priority > TC_CBQ_MAXPRIO)
1358                         err = -EINVAL;
1359         }
1360         return err;
1361 }
1362 
1363 static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
1364 {
1365         struct cbq_sched_data *q = qdisc_priv(sch);
1366         struct nlattr *tb[TCA_CBQ_MAX + 1];
1367         struct tc_ratespec *r;
1368         int err;
1369 
1370         err = cbq_opt_parse(tb, opt);
1371         if (err < 0)
1372                 return err;
1373 
1374         if (tb[TCA_CBQ_RTAB] == NULL || tb[TCA_CBQ_RATE] == NULL)
1375                 return -EINVAL;
1376 
1377         r = nla_data(tb[TCA_CBQ_RATE]);
1378 
1379         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB])) == NULL)
1380                 return -EINVAL;
1381 
1382         err = qdisc_class_hash_init(&q->clhash);
1383         if (err < 0)
1384                 goto put_rtab;
1385 
1386         q->link.refcnt = 1;
1387         q->link.sibling = &q->link;
1388         q->link.common.classid = sch->handle;
1389         q->link.qdisc = sch;
1390         q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
1391                                       sch->handle);
1392         if (!q->link.q)
1393                 q->link.q = &noop_qdisc;
1394 
1395         q->link.priority = TC_CBQ_MAXPRIO - 1;
1396         q->link.priority2 = TC_CBQ_MAXPRIO - 1;
1397         q->link.cpriority = TC_CBQ_MAXPRIO - 1;
1398         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1399         q->link.overlimit = cbq_ovl_classic;
1400         q->link.allot = psched_mtu(qdisc_dev(sch));
1401         q->link.quantum = q->link.allot;
1402         q->link.weight = q->link.R_tab->rate.rate;
1403 
1404         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1405         q->link.avpkt = q->link.allot/2;
1406         q->link.minidle = -0x7FFFFFFF;
1407 
1408         qdisc_watchdog_init(&q->watchdog, sch);
1409         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
1410         q->delay_timer.function = cbq_undelay;
1411         q->toplevel = TC_CBQ_MAXLEVEL;
1412         q->now = psched_get_time();
1413 
1414         cbq_link_class(&q->link);
1415 
1416         if (tb[TCA_CBQ_LSSOPT])
1417                 cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
1418 
1419         cbq_addprio(q, &q->link);
1420         return 0;
1421 
1422 put_rtab:
1423         qdisc_put_rtab(q->link.R_tab);
1424         return err;
1425 }
1426 
1427 static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1428 {
1429         unsigned char *b = skb_tail_pointer(skb);
1430 
1431         if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
1432                 goto nla_put_failure;
1433         return skb->len;
1434 
1435 nla_put_failure:
1436         nlmsg_trim(skb, b);
1437         return -1;
1438 }
1439 
1440 static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1441 {
1442         unsigned char *b = skb_tail_pointer(skb);
1443         struct tc_cbq_lssopt opt;
1444 
1445         opt.flags = 0;
1446         if (cl->borrow == NULL)
1447                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1448         if (cl->share == NULL)
1449                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1450         opt.ewma_log = cl->ewma_log;
1451         opt.level = cl->level;
1452         opt.avpkt = cl->avpkt;
1453         opt.maxidle = cl->maxidle;
1454         opt.minidle = (u32)(-cl->minidle);
1455         opt.offtime = cl->offtime;
1456         opt.change = ~0;
1457         if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
1458                 goto nla_put_failure;
1459         return skb->len;
1460 
1461 nla_put_failure:
1462         nlmsg_trim(skb, b);
1463         return -1;
1464 }
1465 
1466 static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1467 {
1468         unsigned char *b = skb_tail_pointer(skb);
1469         struct tc_cbq_wrropt opt;
1470 
1471         memset(&opt, 0, sizeof(opt));
1472         opt.flags = 0;
1473         opt.allot = cl->allot;
1474         opt.priority = cl->priority + 1;
1475         opt.cpriority = cl->cpriority + 1;
1476         opt.weight = cl->weight;
1477         if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
1478                 goto nla_put_failure;
1479         return skb->len;
1480 
1481 nla_put_failure:
1482         nlmsg_trim(skb, b);
1483         return -1;
1484 }
1485 
1486 static int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1487 {
1488         unsigned char *b = skb_tail_pointer(skb);
1489         struct tc_cbq_ovl opt;
1490 
1491         opt.strategy = cl->ovl_strategy;
1492         opt.priority2 = cl->priority2 + 1;
1493         opt.pad = 0;
1494         opt.penalty = cl->penalty;
1495         if (nla_put(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt))
1496                 goto nla_put_failure;
1497         return skb->len;
1498 
1499 nla_put_failure:
1500         nlmsg_trim(skb, b);
1501         return -1;
1502 }
1503 
1504 static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1505 {
1506         unsigned char *b = skb_tail_pointer(skb);
1507         struct tc_cbq_fopt opt;
1508 
1509         if (cl->split || cl->defmap) {
1510                 opt.split = cl->split ? cl->split->common.classid : 0;
1511                 opt.defmap = cl->defmap;
1512                 opt.defchange = ~0;
1513                 if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
1514                         goto nla_put_failure;
1515         }
1516         return skb->len;
1517 
1518 nla_put_failure:
1519         nlmsg_trim(skb, b);
1520         return -1;
1521 }
1522 
1523 #ifdef CONFIG_NET_CLS_ACT
1524 static int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char *b = skb_tail_pointer(skb);
1527         struct tc_cbq_police opt;
1528 
1529         if (cl->police) {
1530                 opt.police = cl->police;
1531                 opt.__res1 = 0;
1532                 opt.__res2 = 0;
1533                 if (nla_put(skb, TCA_CBQ_POLICE, sizeof(opt), &opt))
1534                         goto nla_put_failure;
1535         }
1536         return skb->len;
1537 
1538 nla_put_failure:
1539         nlmsg_trim(skb, b);
1540         return -1;
1541 }
1542 #endif
1543 
1544 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1545 {
1546         if (cbq_dump_lss(skb, cl) < 0 ||
1547             cbq_dump_rate(skb, cl) < 0 ||
1548             cbq_dump_wrr(skb, cl) < 0 ||
1549             cbq_dump_ovl(skb, cl) < 0 ||
1550 #ifdef CONFIG_NET_CLS_ACT
1551             cbq_dump_police(skb, cl) < 0 ||
1552 #endif
1553             cbq_dump_fopt(skb, cl) < 0)
1554                 return -1;
1555         return 0;
1556 }
1557 
1558 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1559 {
1560         struct cbq_sched_data *q = qdisc_priv(sch);
1561         struct nlattr *nest;
1562 
1563         nest = nla_nest_start(skb, TCA_OPTIONS);
1564         if (nest == NULL)
1565                 goto nla_put_failure;
1566         if (cbq_dump_attr(skb, &q->link) < 0)
1567                 goto nla_put_failure;
1568         return nla_nest_end(skb, nest);
1569 
1570 nla_put_failure:
1571         nla_nest_cancel(skb, nest);
1572         return -1;
1573 }
1574 
1575 static int
1576 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1577 {
1578         struct cbq_sched_data *q = qdisc_priv(sch);
1579 
1580         q->link.xstats.avgidle = q->link.avgidle;
1581         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1582 }
1583 
1584 static int
1585 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1586                struct sk_buff *skb, struct tcmsg *tcm)
1587 {
1588         struct cbq_class *cl = (struct cbq_class *)arg;
1589         struct nlattr *nest;
1590 
1591         if (cl->tparent)
1592                 tcm->tcm_parent = cl->tparent->common.classid;
1593         else
1594                 tcm->tcm_parent = TC_H_ROOT;
1595         tcm->tcm_handle = cl->common.classid;
1596         tcm->tcm_info = cl->q->handle;
1597 
1598         nest = nla_nest_start(skb, TCA_OPTIONS);
1599         if (nest == NULL)
1600                 goto nla_put_failure;
1601         if (cbq_dump_attr(skb, cl) < 0)
1602                 goto nla_put_failure;
1603         return nla_nest_end(skb, nest);
1604 
1605 nla_put_failure:
1606         nla_nest_cancel(skb, nest);
1607         return -1;
1608 }
1609 
1610 static int
1611 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1612         struct gnet_dump *d)
1613 {
1614         struct cbq_sched_data *q = qdisc_priv(sch);
1615         struct cbq_class *cl = (struct cbq_class *)arg;
1616 
1617         cl->xstats.avgidle = cl->avgidle;
1618         cl->xstats.undertime = 0;
1619 
1620         if (cl->undertime != PSCHED_PASTPERFECT)
1621                 cl->xstats.undertime = cl->undertime - q->now;
1622 
1623         if (gnet_stats_copy_basic(d, NULL, &cl->bstats) < 0 ||
1624             gnet_stats_copy_rate_est(d, &cl->bstats, &cl->rate_est) < 0 ||
1625             gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
1626                 return -1;
1627 
1628         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1629 }
1630 
1631 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1632                      struct Qdisc **old)
1633 {
1634         struct cbq_class *cl = (struct cbq_class *)arg;
1635 
1636         if (new == NULL) {
1637                 new = qdisc_create_dflt(sch->dev_queue,
1638                                         &pfifo_qdisc_ops, cl->common.classid);
1639                 if (new == NULL)
1640                         return -ENOBUFS;
1641         } else {
1642 #ifdef CONFIG_NET_CLS_ACT
1643                 if (cl->police == TC_POLICE_RECLASSIFY)
1644                         new->reshape_fail = cbq_reshape_fail;
1645 #endif
1646         }
1647 
1648         *old = qdisc_replace(sch, new, &cl->q);
1649         return 0;
1650 }
1651 
1652 static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
1653 {
1654         struct cbq_class *cl = (struct cbq_class *)arg;
1655 
1656         return cl->q;
1657 }
1658 
1659 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1660 {
1661         struct cbq_class *cl = (struct cbq_class *)arg;
1662 
1663         if (cl->q->q.qlen == 0)
1664                 cbq_deactivate_class(cl);
1665 }
1666 
1667 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1668 {
1669         struct cbq_sched_data *q = qdisc_priv(sch);
1670         struct cbq_class *cl = cbq_class_lookup(q, classid);
1671 
1672         if (cl) {
1673                 cl->refcnt++;
1674                 return (unsigned long)cl;
1675         }
1676         return 0;
1677 }
1678 
1679 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1680 {
1681         struct cbq_sched_data *q = qdisc_priv(sch);
1682 
1683         WARN_ON(cl->filters);
1684 
1685         tcf_destroy_chain(&cl->filter_list);
1686         qdisc_destroy(cl->q);
1687         qdisc_put_rtab(cl->R_tab);
1688         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1689         if (cl != &q->link)
1690                 kfree(cl);
1691 }
1692 
1693 static void cbq_destroy(struct Qdisc *sch)
1694 {
1695         struct cbq_sched_data *q = qdisc_priv(sch);
1696         struct hlist_node *next;
1697         struct cbq_class *cl;
1698         unsigned int h;
1699 
1700 #ifdef CONFIG_NET_CLS_ACT
1701         q->rx_class = NULL;
1702 #endif
1703         /*
1704          * Filters must be destroyed first because we don't destroy the
1705          * classes from root to leafs which means that filters can still
1706          * be bound to classes which have been destroyed already. --TGR '04
1707          */
1708         for (h = 0; h < q->clhash.hashsize; h++) {
1709                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
1710                         tcf_destroy_chain(&cl->filter_list);
1711         }
1712         for (h = 0; h < q->clhash.hashsize; h++) {
1713                 hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
1714                                           common.hnode)
1715                         cbq_destroy_class(sch, cl);
1716         }
1717         qdisc_class_hash_destroy(&q->clhash);
1718 }
1719 
1720 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1721 {
1722         struct cbq_class *cl = (struct cbq_class *)arg;
1723 
1724         if (--cl->refcnt == 0) {
1725 #ifdef CONFIG_NET_CLS_ACT
1726                 spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
1727                 struct cbq_sched_data *q = qdisc_priv(sch);
1728 
1729                 spin_lock_bh(root_lock);
1730                 if (q->rx_class == cl)
1731                         q->rx_class = NULL;
1732                 spin_unlock_bh(root_lock);
1733 #endif
1734 
1735                 cbq_destroy_class(sch, cl);
1736         }
1737 }
1738 
1739 static int
1740 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
1741                  unsigned long *arg)
1742 {
1743         int err;
1744         struct cbq_sched_data *q = qdisc_priv(sch);
1745         struct cbq_class *cl = (struct cbq_class *)*arg;
1746         struct nlattr *opt = tca[TCA_OPTIONS];
1747         struct nlattr *tb[TCA_CBQ_MAX + 1];
1748         struct cbq_class *parent;
1749         struct qdisc_rate_table *rtab = NULL;
1750 
1751         err = cbq_opt_parse(tb, opt);
1752         if (err < 0)
1753                 return err;
1754 
1755         if (cl) {
1756                 /* Check parent */
1757                 if (parentid) {
1758                         if (cl->tparent &&
1759                             cl->tparent->common.classid != parentid)
1760                                 return -EINVAL;
1761                         if (!cl->tparent && parentid != TC_H_ROOT)
1762                                 return -EINVAL;
1763                 }
1764 
1765                 if (tb[TCA_CBQ_RATE]) {
1766                         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
1767                                               tb[TCA_CBQ_RTAB]);
1768                         if (rtab == NULL)
1769                                 return -EINVAL;
1770                 }
1771 
1772                 if (tca[TCA_RATE]) {
1773                         err = gen_replace_estimator(&cl->bstats, NULL,
1774                                                     &cl->rate_est,
1775                                                     qdisc_root_sleeping_lock(sch),
1776                                                     tca[TCA_RATE]);
1777                         if (err) {
1778                                 qdisc_put_rtab(rtab);
1779                                 return err;
1780                         }
1781                 }
1782 
1783                 /* Change class parameters */
1784                 sch_tree_lock(sch);
1785 
1786                 if (cl->next_alive != NULL)
1787                         cbq_deactivate_class(cl);
1788 
1789                 if (rtab) {
1790                         qdisc_put_rtab(cl->R_tab);
1791                         cl->R_tab = rtab;
1792                 }
1793 
1794                 if (tb[TCA_CBQ_LSSOPT])
1795                         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1796 
1797                 if (tb[TCA_CBQ_WRROPT]) {
1798                         cbq_rmprio(q, cl);
1799                         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1800                 }
1801 
1802                 if (tb[TCA_CBQ_OVL_STRATEGY])
1803                         cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1804 
1805 #ifdef CONFIG_NET_CLS_ACT
1806                 if (tb[TCA_CBQ_POLICE])
1807                         cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1808 #endif
1809 
1810                 if (tb[TCA_CBQ_FOPT])
1811                         cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1812 
1813                 if (cl->q->q.qlen)
1814                         cbq_activate_class(cl);
1815 
1816                 sch_tree_unlock(sch);
1817 
1818                 return 0;
1819         }
1820 
1821         if (parentid == TC_H_ROOT)
1822                 return -EINVAL;
1823 
1824         if (tb[TCA_CBQ_WRROPT] == NULL || tb[TCA_CBQ_RATE] == NULL ||
1825             tb[TCA_CBQ_LSSOPT] == NULL)
1826                 return -EINVAL;
1827 
1828         rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB]);
1829         if (rtab == NULL)
1830                 return -EINVAL;
1831 
1832         if (classid) {
1833                 err = -EINVAL;
1834                 if (TC_H_MAJ(classid ^ sch->handle) ||
1835                     cbq_class_lookup(q, classid))
1836                         goto failure;
1837         } else {
1838                 int i;
1839                 classid = TC_H_MAKE(sch->handle, 0x8000);
1840 
1841                 for (i = 0; i < 0x8000; i++) {
1842                         if (++q->hgenerator >= 0x8000)
1843                                 q->hgenerator = 1;
1844                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1845                                 break;
1846                 }
1847                 err = -ENOSR;
1848                 if (i >= 0x8000)
1849                         goto failure;
1850                 classid = classid|q->hgenerator;
1851         }
1852 
1853         parent = &q->link;
1854         if (parentid) {
1855                 parent = cbq_class_lookup(q, parentid);
1856                 err = -EINVAL;
1857                 if (parent == NULL)
1858                         goto failure;
1859         }
1860 
1861         err = -ENOBUFS;
1862         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1863         if (cl == NULL)
1864                 goto failure;
1865 
1866         if (tca[TCA_RATE]) {
1867                 err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
1868                                         qdisc_root_sleeping_lock(sch),
1869                                         tca[TCA_RATE]);
1870                 if (err) {
1871                         kfree(cl);
1872                         goto failure;
1873                 }
1874         }
1875 
1876         cl->R_tab = rtab;
1877         rtab = NULL;
1878         cl->refcnt = 1;
1879         cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid);
1880         if (!cl->q)
1881                 cl->q = &noop_qdisc;
1882         cl->common.classid = classid;
1883         cl->tparent = parent;
1884         cl->qdisc = sch;
1885         cl->allot = parent->allot;
1886         cl->quantum = cl->allot;
1887         cl->weight = cl->R_tab->rate.rate;
1888 
1889         sch_tree_lock(sch);
1890         cbq_link_class(cl);
1891         cl->borrow = cl->tparent;
1892         if (cl->tparent != &q->link)
1893                 cl->share = cl->tparent;
1894         cbq_adjust_levels(parent);
1895         cl->minidle = -0x7FFFFFFF;
1896         cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
1897         cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
1898         if (cl->ewma_log == 0)
1899                 cl->ewma_log = q->link.ewma_log;
1900         if (cl->maxidle == 0)
1901                 cl->maxidle = q->link.maxidle;
1902         if (cl->avpkt == 0)
1903                 cl->avpkt = q->link.avpkt;
1904         cl->overlimit = cbq_ovl_classic;
1905         if (tb[TCA_CBQ_OVL_STRATEGY])
1906                 cbq_set_overlimit(cl, nla_data(tb[TCA_CBQ_OVL_STRATEGY]));
1907 #ifdef CONFIG_NET_CLS_ACT
1908         if (tb[TCA_CBQ_POLICE])
1909                 cbq_set_police(cl, nla_data(tb[TCA_CBQ_POLICE]));
1910 #endif
1911         if (tb[TCA_CBQ_FOPT])
1912                 cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
1913         sch_tree_unlock(sch);
1914 
1915         qdisc_class_hash_grow(sch, &q->clhash);
1916 
1917         *arg = (unsigned long)cl;
1918         return 0;
1919 
1920 failure:
1921         qdisc_put_rtab(rtab);
1922         return err;
1923 }
1924 
1925 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1926 {
1927         struct cbq_sched_data *q = qdisc_priv(sch);
1928         struct cbq_class *cl = (struct cbq_class *)arg;
1929         unsigned int qlen, backlog;
1930 
1931         if (cl->filters || cl->children || cl == &q->link)
1932                 return -EBUSY;
1933 
1934         sch_tree_lock(sch);
1935 
1936         qlen = cl->q->q.qlen;
1937         backlog = cl->q->qstats.backlog;
1938         qdisc_reset(cl->q);
1939         qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
1940 
1941         if (cl->next_alive)
1942                 cbq_deactivate_class(cl);
1943 
1944         if (q->tx_borrowed == cl)
1945                 q->tx_borrowed = q->tx_class;
1946         if (q->tx_class == cl) {
1947                 q->tx_class = NULL;
1948                 q->tx_borrowed = NULL;
1949         }
1950 #ifdef CONFIG_NET_CLS_ACT
1951         if (q->rx_class == cl)
1952                 q->rx_class = NULL;
1953 #endif
1954 
1955         cbq_unlink_class(cl);
1956         cbq_adjust_levels(cl->tparent);
1957         cl->defmap = 0;
1958         cbq_sync_defmap(cl);
1959 
1960         cbq_rmprio(q, cl);
1961         sch_tree_unlock(sch);
1962 
1963         BUG_ON(--cl->refcnt == 0);
1964         /*
1965          * This shouldn't happen: we "hold" one cops->get() when called
1966          * from tc_ctl_tclass; the destroy method is done from cops->put().
1967          */
1968 
1969         return 0;
1970 }
1971 
1972 static struct tcf_proto __rcu **cbq_find_tcf(struct Qdisc *sch,
1973                                              unsigned long arg)
1974 {
1975         struct cbq_sched_data *q = qdisc_priv(sch);
1976         struct cbq_class *cl = (struct cbq_class *)arg;
1977 
1978         if (cl == NULL)
1979                 cl = &q->link;
1980 
1981         return &cl->filter_list;
1982 }
1983 
1984 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
1985                                      u32 classid)
1986 {
1987         struct cbq_sched_data *q = qdisc_priv(sch);
1988         struct cbq_class *p = (struct cbq_class *)parent;
1989         struct cbq_class *cl = cbq_class_lookup(q, classid);
1990 
1991         if (cl) {
1992                 if (p && p->level <= cl->level)
1993                         return 0;
1994                 cl->filters++;
1995                 return (unsigned long)cl;
1996         }
1997         return 0;
1998 }
1999 
2000 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2001 {
2002         struct cbq_class *cl = (struct cbq_class *)arg;
2003 
2004         cl->filters--;
2005 }
2006 
2007 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2008 {
2009         struct cbq_sched_data *q = qdisc_priv(sch);
2010         struct cbq_class *cl;
2011         unsigned int h;
2012 
2013         if (arg->stop)
2014                 return;
2015 
2016         for (h = 0; h < q->clhash.hashsize; h++) {
2017                 hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
2018                         if (arg->count < arg->skip) {
2019                                 arg->count++;
2020                                 continue;
2021                         }
2022                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2023                                 arg->stop = 1;
2024                                 return;
2025                         }
2026                         arg->count++;
2027                 }
2028         }
2029 }
2030 
2031 static const struct Qdisc_class_ops cbq_class_ops = {
2032         .graft          =       cbq_graft,
2033         .leaf           =       cbq_leaf,
2034         .qlen_notify    =       cbq_qlen_notify,
2035         .get            =       cbq_get,
2036         .put            =       cbq_put,
2037         .change         =       cbq_change_class,
2038         .delete         =       cbq_delete,
2039         .walk           =       cbq_walk,
2040         .tcf_chain      =       cbq_find_tcf,
2041         .bind_tcf       =       cbq_bind_filter,
2042         .unbind_tcf     =       cbq_unbind_filter,
2043         .dump           =       cbq_dump_class,
2044         .dump_stats     =       cbq_dump_class_stats,
2045 };
2046 
2047 static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
2048         .next           =       NULL,
2049         .cl_ops         =       &cbq_class_ops,
2050         .id             =       "cbq",
2051         .priv_size      =       sizeof(struct cbq_sched_data),
2052         .enqueue        =       cbq_enqueue,
2053         .dequeue        =       cbq_dequeue,
2054         .peek           =       qdisc_peek_dequeued,
2055         .drop           =       cbq_drop,
2056         .init           =       cbq_init,
2057         .reset          =       cbq_reset,
2058         .destroy        =       cbq_destroy,
2059         .change         =       NULL,
2060         .dump           =       cbq_dump,
2061         .dump_stats     =       cbq_dump_stats,
2062         .owner          =       THIS_MODULE,
2063 };
2064 
2065 static int __init cbq_module_init(void)
2066 {
2067         return register_qdisc(&cbq_qdisc_ops);
2068 }
2069 static void __exit cbq_module_exit(void)
2070 {
2071         unregister_qdisc(&cbq_qdisc_ops);
2072 }
2073 module_init(cbq_module_init)
2074 module_exit(cbq_module_exit)
2075 MODULE_LICENSE("GPL");
2076 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp