~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_sfq.c

Version: ~ [ linux-5.15-rc1 ] ~ [ linux-5.14.5 ] ~ [ linux-5.13.18 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.66 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.147 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.206 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.246 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.282 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.283 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  */
 11 
 12 #include <linux/module.h>
 13 #include <linux/types.h>
 14 #include <linux/kernel.h>
 15 #include <linux/jiffies.h>
 16 #include <linux/string.h>
 17 #include <linux/in.h>
 18 #include <linux/errno.h>
 19 #include <linux/init.h>
 20 #include <linux/skbuff.h>
 21 #include <linux/jhash.h>
 22 #include <linux/slab.h>
 23 #include <linux/vmalloc.h>
 24 #include <net/netlink.h>
 25 #include <net/pkt_sched.h>
 26 #include <net/pkt_cls.h>
 27 #include <net/red.h>
 28 
 29 
 30 /*      Stochastic Fairness Queuing algorithm.
 31         =======================================
 32 
 33         Source:
 34         Paul E. McKenney "Stochastic Fairness Queuing",
 35         IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
 36 
 37         Paul E. McKenney "Stochastic Fairness Queuing",
 38         "Interworking: Research and Experience", v.2, 1991, p.113-131.
 39 
 40 
 41         See also:
 42         M. Shreedhar and George Varghese "Efficient Fair
 43         Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
 44 
 45 
 46         This is not the thing that is usually called (W)FQ nowadays.
 47         It does not use any timestamp mechanism, but instead
 48         processes queues in round-robin order.
 49 
 50         ADVANTAGE:
 51 
 52         - It is very cheap. Both CPU and memory requirements are minimal.
 53 
 54         DRAWBACKS:
 55 
 56         - "Stochastic" -> It is not 100% fair.
 57         When hash collisions occur, several flows are considered as one.
 58 
 59         - "Round-robin" -> It introduces larger delays than virtual clock
 60         based schemes, and should not be used for isolating interactive
 61         traffic from non-interactive. It means, that this scheduler
 62         should be used as leaf of CBQ or P3, which put interactive traffic
 63         to higher priority band.
 64 
 65         We still need true WFQ for top level CSZ, but using WFQ
 66         for the best effort traffic is absolutely pointless:
 67         SFQ is superior for this purpose.
 68 
 69         IMPLEMENTATION:
 70         This implementation limits :
 71         - maximal queue length per flow to 127 packets.
 72         - max mtu to 2^18-1;
 73         - max 65408 flows,
 74         - number of hash buckets to 65536.
 75 
 76         It is easy to increase these values, but not in flight.  */
 77 
 78 #define SFQ_MAX_DEPTH           127 /* max number of packets per flow */
 79 #define SFQ_DEFAULT_FLOWS       128
 80 #define SFQ_MAX_FLOWS           (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
 81 #define SFQ_EMPTY_SLOT          0xffff
 82 #define SFQ_DEFAULT_HASH_DIVISOR 1024
 83 
 84 /* We use 16 bits to store allot, and want to handle packets up to 64K
 85  * Scale allot by 8 (1<<3) so that no overflow occurs.
 86  */
 87 #define SFQ_ALLOT_SHIFT         3
 88 #define SFQ_ALLOT_SIZE(X)       DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 89 
 90 /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
 91 typedef u16 sfq_index;
 92 
 93 /*
 94  * We dont use pointers to save space.
 95  * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
 96  * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
 97  * are 'pointers' to dep[] array
 98  */
 99 struct sfq_head {
100         sfq_index       next;
101         sfq_index       prev;
102 };
103 
104 struct sfq_slot {
105         struct sk_buff  *skblist_next;
106         struct sk_buff  *skblist_prev;
107         sfq_index       qlen; /* number of skbs in skblist */
108         sfq_index       next; /* next slot in sfq RR chain */
109         struct sfq_head dep; /* anchor in dep[] chains */
110         unsigned short  hash; /* hash value (index in ht[]) */
111         short           allot; /* credit for this slot */
112 
113         unsigned int    backlog;
114         struct red_vars vars;
115 };
116 
117 struct sfq_sched_data {
118 /* frequently used fields */
119         int             limit;          /* limit of total number of packets in this qdisc */
120         unsigned int    divisor;        /* number of slots in hash table */
121         u8              headdrop;
122         u8              maxdepth;       /* limit of packets per flow */
123 
124         u32             perturbation;
125         u8              cur_depth;      /* depth of longest slot */
126         u8              flags;
127         unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
128         struct tcf_proto __rcu *filter_list;
129         sfq_index       *ht;            /* Hash table ('divisor' slots) */
130         struct sfq_slot *slots;         /* Flows table ('maxflows' entries) */
131 
132         struct red_parms *red_parms;
133         struct tc_sfqred_stats stats;
134         struct sfq_slot *tail;          /* current slot in round */
135 
136         struct sfq_head dep[SFQ_MAX_DEPTH + 1];
137                                         /* Linked lists of slots, indexed by depth
138                                          * dep[0] : list of unused flows
139                                          * dep[1] : list of flows with 1 packet
140                                          * dep[X] : list of flows with X packets
141                                          */
142 
143         unsigned int    maxflows;       /* number of flows in flows array */
144         int             perturb_period;
145         unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
146         struct timer_list perturb_timer;
147 };
148 
149 /*
150  * sfq_head are either in a sfq_slot or in dep[] array
151  */
152 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
153 {
154         if (val < SFQ_MAX_FLOWS)
155                 return &q->slots[val].dep;
156         return &q->dep[val - SFQ_MAX_FLOWS];
157 }
158 
159 static unsigned int sfq_hash(const struct sfq_sched_data *q,
160                              const struct sk_buff *skb)
161 {
162         return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
163 }
164 
165 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
166                                  int *qerr)
167 {
168         struct sfq_sched_data *q = qdisc_priv(sch);
169         struct tcf_result res;
170         struct tcf_proto *fl;
171         int result;
172 
173         if (TC_H_MAJ(skb->priority) == sch->handle &&
174             TC_H_MIN(skb->priority) > 0 &&
175             TC_H_MIN(skb->priority) <= q->divisor)
176                 return TC_H_MIN(skb->priority);
177 
178         fl = rcu_dereference_bh(q->filter_list);
179         if (!fl)
180                 return sfq_hash(q, skb) + 1;
181 
182         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
183         result = tc_classify(skb, fl, &res, false);
184         if (result >= 0) {
185 #ifdef CONFIG_NET_CLS_ACT
186                 switch (result) {
187                 case TC_ACT_STOLEN:
188                 case TC_ACT_QUEUED:
189                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
190                 case TC_ACT_SHOT:
191                         return 0;
192                 }
193 #endif
194                 if (TC_H_MIN(res.classid) <= q->divisor)
195                         return TC_H_MIN(res.classid);
196         }
197         return 0;
198 }
199 
200 /*
201  * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
202  */
203 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
204 {
205         sfq_index p, n;
206         struct sfq_slot *slot = &q->slots[x];
207         int qlen = slot->qlen;
208 
209         p = qlen + SFQ_MAX_FLOWS;
210         n = q->dep[qlen].next;
211 
212         slot->dep.next = n;
213         slot->dep.prev = p;
214 
215         q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
216         sfq_dep_head(q, n)->prev = x;
217 }
218 
219 #define sfq_unlink(q, x, n, p)                  \
220         do {                                    \
221                 n = q->slots[x].dep.next;       \
222                 p = q->slots[x].dep.prev;       \
223                 sfq_dep_head(q, p)->next = n;   \
224                 sfq_dep_head(q, n)->prev = p;   \
225         } while (0)
226 
227 
228 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
229 {
230         sfq_index p, n;
231         int d;
232 
233         sfq_unlink(q, x, n, p);
234 
235         d = q->slots[x].qlen--;
236         if (n == p && q->cur_depth == d)
237                 q->cur_depth--;
238         sfq_link(q, x);
239 }
240 
241 static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
242 {
243         sfq_index p, n;
244         int d;
245 
246         sfq_unlink(q, x, n, p);
247 
248         d = ++q->slots[x].qlen;
249         if (q->cur_depth < d)
250                 q->cur_depth = d;
251         sfq_link(q, x);
252 }
253 
254 /* helper functions : might be changed when/if skb use a standard list_head */
255 
256 /* remove one skb from tail of slot queue */
257 static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
258 {
259         struct sk_buff *skb = slot->skblist_prev;
260 
261         slot->skblist_prev = skb->prev;
262         skb->prev->next = (struct sk_buff *)slot;
263         skb->next = skb->prev = NULL;
264         return skb;
265 }
266 
267 /* remove one skb from head of slot queue */
268 static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
269 {
270         struct sk_buff *skb = slot->skblist_next;
271 
272         slot->skblist_next = skb->next;
273         skb->next->prev = (struct sk_buff *)slot;
274         skb->next = skb->prev = NULL;
275         return skb;
276 }
277 
278 static inline void slot_queue_init(struct sfq_slot *slot)
279 {
280         memset(slot, 0, sizeof(*slot));
281         slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
282 }
283 
284 /* add skb to slot queue (tail add) */
285 static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
286 {
287         skb->prev = slot->skblist_prev;
288         skb->next = (struct sk_buff *)slot;
289         slot->skblist_prev->next = skb;
290         slot->skblist_prev = skb;
291 }
292 
293 static unsigned int sfq_drop(struct Qdisc *sch)
294 {
295         struct sfq_sched_data *q = qdisc_priv(sch);
296         sfq_index x, d = q->cur_depth;
297         struct sk_buff *skb;
298         unsigned int len;
299         struct sfq_slot *slot;
300 
301         /* Queue is full! Find the longest slot and drop tail packet from it */
302         if (d > 1) {
303                 x = q->dep[d].next;
304                 slot = &q->slots[x];
305 drop:
306                 skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
307                 len = qdisc_pkt_len(skb);
308                 slot->backlog -= len;
309                 sfq_dec(q, x);
310                 sch->q.qlen--;
311                 qdisc_qstats_drop(sch);
312                 qdisc_qstats_backlog_dec(sch, skb);
313                 kfree_skb(skb);
314                 return len;
315         }
316 
317         if (d == 1) {
318                 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
319                 x = q->tail->next;
320                 slot = &q->slots[x];
321                 q->tail->next = slot->next;
322                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
323                 goto drop;
324         }
325 
326         return 0;
327 }
328 
329 /* Is ECN parameter configured */
330 static int sfq_prob_mark(const struct sfq_sched_data *q)
331 {
332         return q->flags & TC_RED_ECN;
333 }
334 
335 /* Should packets over max threshold just be marked */
336 static int sfq_hard_mark(const struct sfq_sched_data *q)
337 {
338         return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
339 }
340 
341 static int sfq_headdrop(const struct sfq_sched_data *q)
342 {
343         return q->headdrop;
344 }
345 
346 static int
347 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
348 {
349         struct sfq_sched_data *q = qdisc_priv(sch);
350         unsigned int hash, dropped;
351         sfq_index x, qlen;
352         struct sfq_slot *slot;
353         int uninitialized_var(ret);
354         struct sk_buff *head;
355         int delta;
356 
357         hash = sfq_classify(skb, sch, &ret);
358         if (hash == 0) {
359                 if (ret & __NET_XMIT_BYPASS)
360                         qdisc_qstats_drop(sch);
361                 kfree_skb(skb);
362                 return ret;
363         }
364         hash--;
365 
366         x = q->ht[hash];
367         slot = &q->slots[x];
368         if (x == SFQ_EMPTY_SLOT) {
369                 x = q->dep[0].next; /* get a free slot */
370                 if (x >= SFQ_MAX_FLOWS)
371                         return qdisc_drop(skb, sch, to_free);
372                 q->ht[hash] = x;
373                 slot = &q->slots[x];
374                 slot->hash = hash;
375                 slot->backlog = 0; /* should already be 0 anyway... */
376                 red_set_vars(&slot->vars);
377                 goto enqueue;
378         }
379         if (q->red_parms) {
380                 slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
381                                                         &slot->vars,
382                                                         slot->backlog);
383                 switch (red_action(q->red_parms,
384                                    &slot->vars,
385                                    slot->vars.qavg)) {
386                 case RED_DONT_MARK:
387                         break;
388 
389                 case RED_PROB_MARK:
390                         qdisc_qstats_overlimit(sch);
391                         if (sfq_prob_mark(q)) {
392                                 /* We know we have at least one packet in queue */
393                                 if (sfq_headdrop(q) &&
394                                     INET_ECN_set_ce(slot->skblist_next)) {
395                                         q->stats.prob_mark_head++;
396                                         break;
397                                 }
398                                 if (INET_ECN_set_ce(skb)) {
399                                         q->stats.prob_mark++;
400                                         break;
401                                 }
402                         }
403                         q->stats.prob_drop++;
404                         goto congestion_drop;
405 
406                 case RED_HARD_MARK:
407                         qdisc_qstats_overlimit(sch);
408                         if (sfq_hard_mark(q)) {
409                                 /* We know we have at least one packet in queue */
410                                 if (sfq_headdrop(q) &&
411                                     INET_ECN_set_ce(slot->skblist_next)) {
412                                         q->stats.forced_mark_head++;
413                                         break;
414                                 }
415                                 if (INET_ECN_set_ce(skb)) {
416                                         q->stats.forced_mark++;
417                                         break;
418                                 }
419                         }
420                         q->stats.forced_drop++;
421                         goto congestion_drop;
422                 }
423         }
424 
425         if (slot->qlen >= q->maxdepth) {
426 congestion_drop:
427                 if (!sfq_headdrop(q))
428                         return qdisc_drop(skb, sch, to_free);
429 
430                 /* We know we have at least one packet in queue */
431                 head = slot_dequeue_head(slot);
432                 delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
433                 sch->qstats.backlog -= delta;
434                 slot->backlog -= delta;
435                 qdisc_drop(head, sch, to_free);
436 
437                 slot_queue_add(slot, skb);
438                 qdisc_tree_reduce_backlog(sch, 0, delta);
439                 return NET_XMIT_CN;
440         }
441 
442 enqueue:
443         qdisc_qstats_backlog_inc(sch, skb);
444         slot->backlog += qdisc_pkt_len(skb);
445         slot_queue_add(slot, skb);
446         sfq_inc(q, x);
447         if (slot->qlen == 1) {          /* The flow is new */
448                 if (q->tail == NULL) {  /* It is the first flow */
449                         slot->next = x;
450                 } else {
451                         slot->next = q->tail->next;
452                         q->tail->next = x;
453                 }
454                 /* We put this flow at the end of our flow list.
455                  * This might sound unfair for a new flow to wait after old ones,
456                  * but we could endup servicing new flows only, and freeze old ones.
457                  */
458                 q->tail = slot;
459                 /* We could use a bigger initial quantum for new flows */
460                 slot->allot = q->scaled_quantum;
461         }
462         if (++sch->q.qlen <= q->limit)
463                 return NET_XMIT_SUCCESS;
464 
465         qlen = slot->qlen;
466         dropped = sfq_drop(sch);
467         /* Return Congestion Notification only if we dropped a packet
468          * from this flow.
469          */
470         if (qlen != slot->qlen) {
471                 qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
472                 return NET_XMIT_CN;
473         }
474 
475         /* As we dropped a packet, better let upper stack know this */
476         qdisc_tree_reduce_backlog(sch, 1, dropped);
477         return NET_XMIT_SUCCESS;
478 }
479 
480 static struct sk_buff *
481 sfq_dequeue(struct Qdisc *sch)
482 {
483         struct sfq_sched_data *q = qdisc_priv(sch);
484         struct sk_buff *skb;
485         sfq_index a, next_a;
486         struct sfq_slot *slot;
487 
488         /* No active slots */
489         if (q->tail == NULL)
490                 return NULL;
491 
492 next_slot:
493         a = q->tail->next;
494         slot = &q->slots[a];
495         if (slot->allot <= 0) {
496                 q->tail = slot;
497                 slot->allot += q->scaled_quantum;
498                 goto next_slot;
499         }
500         skb = slot_dequeue_head(slot);
501         sfq_dec(q, a);
502         qdisc_bstats_update(sch, skb);
503         sch->q.qlen--;
504         qdisc_qstats_backlog_dec(sch, skb);
505         slot->backlog -= qdisc_pkt_len(skb);
506         /* Is the slot empty? */
507         if (slot->qlen == 0) {
508                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
509                 next_a = slot->next;
510                 if (a == next_a) {
511                         q->tail = NULL; /* no more active slots */
512                         return skb;
513                 }
514                 q->tail->next = next_a;
515         } else {
516                 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
517         }
518         return skb;
519 }
520 
521 static void
522 sfq_reset(struct Qdisc *sch)
523 {
524         struct sk_buff *skb;
525 
526         while ((skb = sfq_dequeue(sch)) != NULL)
527                 rtnl_kfree_skbs(skb, skb);
528 }
529 
530 /*
531  * When q->perturbation is changed, we rehash all queued skbs
532  * to avoid OOO (Out Of Order) effects.
533  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
534  * counters.
535  */
536 static void sfq_rehash(struct Qdisc *sch)
537 {
538         struct sfq_sched_data *q = qdisc_priv(sch);
539         struct sk_buff *skb;
540         int i;
541         struct sfq_slot *slot;
542         struct sk_buff_head list;
543         int dropped = 0;
544         unsigned int drop_len = 0;
545 
546         __skb_queue_head_init(&list);
547 
548         for (i = 0; i < q->maxflows; i++) {
549                 slot = &q->slots[i];
550                 if (!slot->qlen)
551                         continue;
552                 while (slot->qlen) {
553                         skb = slot_dequeue_head(slot);
554                         sfq_dec(q, i);
555                         __skb_queue_tail(&list, skb);
556                 }
557                 slot->backlog = 0;
558                 red_set_vars(&slot->vars);
559                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
560         }
561         q->tail = NULL;
562 
563         while ((skb = __skb_dequeue(&list)) != NULL) {
564                 unsigned int hash = sfq_hash(q, skb);
565                 sfq_index x = q->ht[hash];
566 
567                 slot = &q->slots[x];
568                 if (x == SFQ_EMPTY_SLOT) {
569                         x = q->dep[0].next; /* get a free slot */
570                         if (x >= SFQ_MAX_FLOWS) {
571 drop:
572                                 qdisc_qstats_backlog_dec(sch, skb);
573                                 drop_len += qdisc_pkt_len(skb);
574                                 kfree_skb(skb);
575                                 dropped++;
576                                 continue;
577                         }
578                         q->ht[hash] = x;
579                         slot = &q->slots[x];
580                         slot->hash = hash;
581                 }
582                 if (slot->qlen >= q->maxdepth)
583                         goto drop;
584                 slot_queue_add(slot, skb);
585                 if (q->red_parms)
586                         slot->vars.qavg = red_calc_qavg(q->red_parms,
587                                                         &slot->vars,
588                                                         slot->backlog);
589                 slot->backlog += qdisc_pkt_len(skb);
590                 sfq_inc(q, x);
591                 if (slot->qlen == 1) {          /* The flow is new */
592                         if (q->tail == NULL) {  /* It is the first flow */
593                                 slot->next = x;
594                         } else {
595                                 slot->next = q->tail->next;
596                                 q->tail->next = x;
597                         }
598                         q->tail = slot;
599                         slot->allot = q->scaled_quantum;
600                 }
601         }
602         sch->q.qlen -= dropped;
603         qdisc_tree_reduce_backlog(sch, dropped, drop_len);
604 }
605 
606 static void sfq_perturbation(unsigned long arg)
607 {
608         struct Qdisc *sch = (struct Qdisc *)arg;
609         struct sfq_sched_data *q = qdisc_priv(sch);
610         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
611 
612         spin_lock(root_lock);
613         q->perturbation = prandom_u32();
614         if (!q->filter_list && q->tail)
615                 sfq_rehash(sch);
616         spin_unlock(root_lock);
617 
618         if (q->perturb_period)
619                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
620 }
621 
622 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
623 {
624         struct sfq_sched_data *q = qdisc_priv(sch);
625         struct tc_sfq_qopt *ctl = nla_data(opt);
626         struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
627         unsigned int qlen, dropped = 0;
628         struct red_parms *p = NULL;
629 
630         if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
631                 return -EINVAL;
632         if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
633                 ctl_v1 = nla_data(opt);
634         if (ctl->divisor &&
635             (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
636                 return -EINVAL;
637         if (ctl_v1 && ctl_v1->qth_min) {
638                 p = kmalloc(sizeof(*p), GFP_KERNEL);
639                 if (!p)
640                         return -ENOMEM;
641         }
642         sch_tree_lock(sch);
643         if (ctl->quantum) {
644                 q->quantum = ctl->quantum;
645                 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
646         }
647         q->perturb_period = ctl->perturb_period * HZ;
648         if (ctl->flows)
649                 q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
650         if (ctl->divisor) {
651                 q->divisor = ctl->divisor;
652                 q->maxflows = min_t(u32, q->maxflows, q->divisor);
653         }
654         if (ctl_v1) {
655                 if (ctl_v1->depth)
656                         q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
657                 if (p) {
658                         swap(q->red_parms, p);
659                         red_set_parms(q->red_parms,
660                                       ctl_v1->qth_min, ctl_v1->qth_max,
661                                       ctl_v1->Wlog,
662                                       ctl_v1->Plog, ctl_v1->Scell_log,
663                                       NULL,
664                                       ctl_v1->max_P);
665                 }
666                 q->flags = ctl_v1->flags;
667                 q->headdrop = ctl_v1->headdrop;
668         }
669         if (ctl->limit) {
670                 q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
671                 q->maxflows = min_t(u32, q->maxflows, q->limit);
672         }
673 
674         qlen = sch->q.qlen;
675         while (sch->q.qlen > q->limit)
676                 dropped += sfq_drop(sch);
677         qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
678 
679         del_timer(&q->perturb_timer);
680         if (q->perturb_period) {
681                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
682                 q->perturbation = prandom_u32();
683         }
684         sch_tree_unlock(sch);
685         kfree(p);
686         return 0;
687 }
688 
689 static void *sfq_alloc(size_t sz)
690 {
691         return  kvmalloc(sz, GFP_KERNEL);
692 }
693 
694 static void sfq_free(void *addr)
695 {
696         kvfree(addr);
697 }
698 
699 static void sfq_destroy(struct Qdisc *sch)
700 {
701         struct sfq_sched_data *q = qdisc_priv(sch);
702 
703         tcf_destroy_chain(&q->filter_list);
704         q->perturb_period = 0;
705         del_timer_sync(&q->perturb_timer);
706         sfq_free(q->ht);
707         sfq_free(q->slots);
708         kfree(q->red_parms);
709 }
710 
711 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
712 {
713         struct sfq_sched_data *q = qdisc_priv(sch);
714         int i;
715 
716         setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
717                                (unsigned long)sch);
718 
719         for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
720                 q->dep[i].next = i + SFQ_MAX_FLOWS;
721                 q->dep[i].prev = i + SFQ_MAX_FLOWS;
722         }
723 
724         q->limit = SFQ_MAX_DEPTH;
725         q->maxdepth = SFQ_MAX_DEPTH;
726         q->cur_depth = 0;
727         q->tail = NULL;
728         q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
729         q->maxflows = SFQ_DEFAULT_FLOWS;
730         q->quantum = psched_mtu(qdisc_dev(sch));
731         q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
732         q->perturb_period = 0;
733         q->perturbation = prandom_u32();
734 
735         if (opt) {
736                 int err = sfq_change(sch, opt);
737                 if (err)
738                         return err;
739         }
740 
741         q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
742         q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
743         if (!q->ht || !q->slots) {
744                 /* Note: sfq_destroy() will be called by our caller */
745                 return -ENOMEM;
746         }
747 
748         for (i = 0; i < q->divisor; i++)
749                 q->ht[i] = SFQ_EMPTY_SLOT;
750 
751         for (i = 0; i < q->maxflows; i++) {
752                 slot_queue_init(&q->slots[i]);
753                 sfq_link(q, i);
754         }
755         if (q->limit >= 1)
756                 sch->flags |= TCQ_F_CAN_BYPASS;
757         else
758                 sch->flags &= ~TCQ_F_CAN_BYPASS;
759         return 0;
760 }
761 
762 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
763 {
764         struct sfq_sched_data *q = qdisc_priv(sch);
765         unsigned char *b = skb_tail_pointer(skb);
766         struct tc_sfq_qopt_v1 opt;
767         struct red_parms *p = q->red_parms;
768 
769         memset(&opt, 0, sizeof(opt));
770         opt.v0.quantum  = q->quantum;
771         opt.v0.perturb_period = q->perturb_period / HZ;
772         opt.v0.limit    = q->limit;
773         opt.v0.divisor  = q->divisor;
774         opt.v0.flows    = q->maxflows;
775         opt.depth       = q->maxdepth;
776         opt.headdrop    = q->headdrop;
777 
778         if (p) {
779                 opt.qth_min     = p->qth_min >> p->Wlog;
780                 opt.qth_max     = p->qth_max >> p->Wlog;
781                 opt.Wlog        = p->Wlog;
782                 opt.Plog        = p->Plog;
783                 opt.Scell_log   = p->Scell_log;
784                 opt.max_P       = p->max_P;
785         }
786         memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
787         opt.flags       = q->flags;
788 
789         if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
790                 goto nla_put_failure;
791 
792         return skb->len;
793 
794 nla_put_failure:
795         nlmsg_trim(skb, b);
796         return -1;
797 }
798 
799 static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
800 {
801         return NULL;
802 }
803 
804 static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
805 {
806         return 0;
807 }
808 
809 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
810                               u32 classid)
811 {
812         /* we cannot bypass queue discipline anymore */
813         sch->flags &= ~TCQ_F_CAN_BYPASS;
814         return 0;
815 }
816 
817 static void sfq_put(struct Qdisc *q, unsigned long cl)
818 {
819 }
820 
821 static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
822                                              unsigned long cl)
823 {
824         struct sfq_sched_data *q = qdisc_priv(sch);
825 
826         if (cl)
827                 return NULL;
828         return &q->filter_list;
829 }
830 
831 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
832                           struct sk_buff *skb, struct tcmsg *tcm)
833 {
834         tcm->tcm_handle |= TC_H_MIN(cl);
835         return 0;
836 }
837 
838 static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
839                                 struct gnet_dump *d)
840 {
841         struct sfq_sched_data *q = qdisc_priv(sch);
842         sfq_index idx = q->ht[cl - 1];
843         struct gnet_stats_queue qs = { 0 };
844         struct tc_sfq_xstats xstats = { 0 };
845 
846         if (idx != SFQ_EMPTY_SLOT) {
847                 const struct sfq_slot *slot = &q->slots[idx];
848 
849                 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
850                 qs.qlen = slot->qlen;
851                 qs.backlog = slot->backlog;
852         }
853         if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
854                 return -1;
855         return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
856 }
857 
858 static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
859 {
860         struct sfq_sched_data *q = qdisc_priv(sch);
861         unsigned int i;
862 
863         if (arg->stop)
864                 return;
865 
866         for (i = 0; i < q->divisor; i++) {
867                 if (q->ht[i] == SFQ_EMPTY_SLOT ||
868                     arg->count < arg->skip) {
869                         arg->count++;
870                         continue;
871                 }
872                 if (arg->fn(sch, i + 1, arg) < 0) {
873                         arg->stop = 1;
874                         break;
875                 }
876                 arg->count++;
877         }
878 }
879 
880 static const struct Qdisc_class_ops sfq_class_ops = {
881         .leaf           =       sfq_leaf,
882         .get            =       sfq_get,
883         .put            =       sfq_put,
884         .tcf_chain      =       sfq_find_tcf,
885         .bind_tcf       =       sfq_bind,
886         .unbind_tcf     =       sfq_put,
887         .dump           =       sfq_dump_class,
888         .dump_stats     =       sfq_dump_class_stats,
889         .walk           =       sfq_walk,
890 };
891 
892 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
893         .cl_ops         =       &sfq_class_ops,
894         .id             =       "sfq",
895         .priv_size      =       sizeof(struct sfq_sched_data),
896         .enqueue        =       sfq_enqueue,
897         .dequeue        =       sfq_dequeue,
898         .peek           =       qdisc_peek_dequeued,
899         .init           =       sfq_init,
900         .reset          =       sfq_reset,
901         .destroy        =       sfq_destroy,
902         .change         =       NULL,
903         .dump           =       sfq_dump,
904         .owner          =       THIS_MODULE,
905 };
906 
907 static int __init sfq_module_init(void)
908 {
909         return register_qdisc(&sfq_qdisc_ops);
910 }
911 static void __exit sfq_module_exit(void)
912 {
913         unregister_qdisc(&sfq_qdisc_ops);
914 }
915 module_init(sfq_module_init)
916 module_exit(sfq_module_exit)
917 MODULE_LICENSE("GPL");
918 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp