~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_sfq.c

Version: ~ [ linux-5.4-rc3 ] ~ [ linux-5.3.6 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.79 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.149 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.196 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.196 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.75 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  */
 11 
 12 #include <linux/module.h>
 13 #include <linux/types.h>
 14 #include <linux/kernel.h>
 15 #include <linux/jiffies.h>
 16 #include <linux/string.h>
 17 #include <linux/in.h>
 18 #include <linux/errno.h>
 19 #include <linux/init.h>
 20 #include <linux/skbuff.h>
 21 #include <linux/jhash.h>
 22 #include <linux/slab.h>
 23 #include <linux/vmalloc.h>
 24 #include <net/netlink.h>
 25 #include <net/pkt_sched.h>
 26 #include <net/pkt_cls.h>
 27 #include <net/red.h>
 28 
 29 
 30 /*      Stochastic Fairness Queuing algorithm.
 31         =======================================
 32 
 33         Source:
 34         Paul E. McKenney "Stochastic Fairness Queuing",
 35         IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
 36 
 37         Paul E. McKenney "Stochastic Fairness Queuing",
 38         "Interworking: Research and Experience", v.2, 1991, p.113-131.
 39 
 40 
 41         See also:
 42         M. Shreedhar and George Varghese "Efficient Fair
 43         Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
 44 
 45 
 46         This is not the thing that is usually called (W)FQ nowadays.
 47         It does not use any timestamp mechanism, but instead
 48         processes queues in round-robin order.
 49 
 50         ADVANTAGE:
 51 
 52         - It is very cheap. Both CPU and memory requirements are minimal.
 53 
 54         DRAWBACKS:
 55 
 56         - "Stochastic" -> It is not 100% fair.
 57         When hash collisions occur, several flows are considered as one.
 58 
 59         - "Round-robin" -> It introduces larger delays than virtual clock
 60         based schemes, and should not be used for isolating interactive
 61         traffic from non-interactive. It means, that this scheduler
 62         should be used as leaf of CBQ or P3, which put interactive traffic
 63         to higher priority band.
 64 
 65         We still need true WFQ for top level CSZ, but using WFQ
 66         for the best effort traffic is absolutely pointless:
 67         SFQ is superior for this purpose.
 68 
 69         IMPLEMENTATION:
 70         This implementation limits :
 71         - maximal queue length per flow to 127 packets.
 72         - max mtu to 2^18-1;
 73         - max 65408 flows,
 74         - number of hash buckets to 65536.
 75 
 76         It is easy to increase these values, but not in flight.  */
 77 
 78 #define SFQ_MAX_DEPTH           127 /* max number of packets per flow */
 79 #define SFQ_DEFAULT_FLOWS       128
 80 #define SFQ_MAX_FLOWS           (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
 81 #define SFQ_EMPTY_SLOT          0xffff
 82 #define SFQ_DEFAULT_HASH_DIVISOR 1024
 83 
 84 /* We use 16 bits to store allot, and want to handle packets up to 64K
 85  * Scale allot by 8 (1<<3) so that no overflow occurs.
 86  */
 87 #define SFQ_ALLOT_SHIFT         3
 88 #define SFQ_ALLOT_SIZE(X)       DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 89 
 90 /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
 91 typedef u16 sfq_index;
 92 
 93 /*
 94  * We dont use pointers to save space.
 95  * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
 96  * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
 97  * are 'pointers' to dep[] array
 98  */
 99 struct sfq_head {
100         sfq_index       next;
101         sfq_index       prev;
102 };
103 
104 struct sfq_slot {
105         struct sk_buff  *skblist_next;
106         struct sk_buff  *skblist_prev;
107         sfq_index       qlen; /* number of skbs in skblist */
108         sfq_index       next; /* next slot in sfq RR chain */
109         struct sfq_head dep; /* anchor in dep[] chains */
110         unsigned short  hash; /* hash value (index in ht[]) */
111         short           allot; /* credit for this slot */
112 
113         unsigned int    backlog;
114         struct red_vars vars;
115 };
116 
117 struct sfq_sched_data {
118 /* frequently used fields */
119         int             limit;          /* limit of total number of packets in this qdisc */
120         unsigned int    divisor;        /* number of slots in hash table */
121         u8              headdrop;
122         u8              maxdepth;       /* limit of packets per flow */
123 
124         u32             perturbation;
125         u8              cur_depth;      /* depth of longest slot */
126         u8              flags;
127         unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
128         struct tcf_proto __rcu *filter_list;
129         sfq_index       *ht;            /* Hash table ('divisor' slots) */
130         struct sfq_slot *slots;         /* Flows table ('maxflows' entries) */
131 
132         struct red_parms *red_parms;
133         struct tc_sfqred_stats stats;
134         struct sfq_slot *tail;          /* current slot in round */
135 
136         struct sfq_head dep[SFQ_MAX_DEPTH + 1];
137                                         /* Linked lists of slots, indexed by depth
138                                          * dep[0] : list of unused flows
139                                          * dep[1] : list of flows with 1 packet
140                                          * dep[X] : list of flows with X packets
141                                          */
142 
143         unsigned int    maxflows;       /* number of flows in flows array */
144         int             perturb_period;
145         unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
146         struct timer_list perturb_timer;
147 };
148 
149 /*
150  * sfq_head are either in a sfq_slot or in dep[] array
151  */
152 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
153 {
154         if (val < SFQ_MAX_FLOWS)
155                 return &q->slots[val].dep;
156         return &q->dep[val - SFQ_MAX_FLOWS];
157 }
158 
159 static unsigned int sfq_hash(const struct sfq_sched_data *q,
160                              const struct sk_buff *skb)
161 {
162         return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
163 }
164 
165 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
166                                  int *qerr)
167 {
168         struct sfq_sched_data *q = qdisc_priv(sch);
169         struct tcf_result res;
170         struct tcf_proto *fl;
171         int result;
172 
173         if (TC_H_MAJ(skb->priority) == sch->handle &&
174             TC_H_MIN(skb->priority) > 0 &&
175             TC_H_MIN(skb->priority) <= q->divisor)
176                 return TC_H_MIN(skb->priority);
177 
178         fl = rcu_dereference_bh(q->filter_list);
179         if (!fl)
180                 return sfq_hash(q, skb) + 1;
181 
182         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
183         result = tc_classify(skb, fl, &res, false);
184         if (result >= 0) {
185 #ifdef CONFIG_NET_CLS_ACT
186                 switch (result) {
187                 case TC_ACT_STOLEN:
188                 case TC_ACT_QUEUED:
189                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
190                 case TC_ACT_SHOT:
191                         return 0;
192                 }
193 #endif
194                 if (TC_H_MIN(res.classid) <= q->divisor)
195                         return TC_H_MIN(res.classid);
196         }
197         return 0;
198 }
199 
200 /*
201  * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
202  */
203 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
204 {
205         sfq_index p, n;
206         struct sfq_slot *slot = &q->slots[x];
207         int qlen = slot->qlen;
208 
209         p = qlen + SFQ_MAX_FLOWS;
210         n = q->dep[qlen].next;
211 
212         slot->dep.next = n;
213         slot->dep.prev = p;
214 
215         q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
216         sfq_dep_head(q, n)->prev = x;
217 }
218 
219 #define sfq_unlink(q, x, n, p)                  \
220         do {                                    \
221                 n = q->slots[x].dep.next;       \
222                 p = q->slots[x].dep.prev;       \
223                 sfq_dep_head(q, p)->next = n;   \
224                 sfq_dep_head(q, n)->prev = p;   \
225         } while (0)
226 
227 
228 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
229 {
230         sfq_index p, n;
231         int d;
232 
233         sfq_unlink(q, x, n, p);
234 
235         d = q->slots[x].qlen--;
236         if (n == p && q->cur_depth == d)
237                 q->cur_depth--;
238         sfq_link(q, x);
239 }
240 
241 static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
242 {
243         sfq_index p, n;
244         int d;
245 
246         sfq_unlink(q, x, n, p);
247 
248         d = ++q->slots[x].qlen;
249         if (q->cur_depth < d)
250                 q->cur_depth = d;
251         sfq_link(q, x);
252 }
253 
254 /* helper functions : might be changed when/if skb use a standard list_head */
255 
256 /* remove one skb from tail of slot queue */
257 static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
258 {
259         struct sk_buff *skb = slot->skblist_prev;
260 
261         slot->skblist_prev = skb->prev;
262         skb->prev->next = (struct sk_buff *)slot;
263         skb->next = skb->prev = NULL;
264         return skb;
265 }
266 
267 /* remove one skb from head of slot queue */
268 static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
269 {
270         struct sk_buff *skb = slot->skblist_next;
271 
272         slot->skblist_next = skb->next;
273         skb->next->prev = (struct sk_buff *)slot;
274         skb->next = skb->prev = NULL;
275         return skb;
276 }
277 
278 static inline void slot_queue_init(struct sfq_slot *slot)
279 {
280         memset(slot, 0, sizeof(*slot));
281         slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
282 }
283 
284 /* add skb to slot queue (tail add) */
285 static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
286 {
287         skb->prev = slot->skblist_prev;
288         skb->next = (struct sk_buff *)slot;
289         slot->skblist_prev->next = skb;
290         slot->skblist_prev = skb;
291 }
292 
293 static unsigned int sfq_drop(struct Qdisc *sch)
294 {
295         struct sfq_sched_data *q = qdisc_priv(sch);
296         sfq_index x, d = q->cur_depth;
297         struct sk_buff *skb;
298         unsigned int len;
299         struct sfq_slot *slot;
300 
301         /* Queue is full! Find the longest slot and drop tail packet from it */
302         if (d > 1) {
303                 x = q->dep[d].next;
304                 slot = &q->slots[x];
305 drop:
306                 skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
307                 len = qdisc_pkt_len(skb);
308                 slot->backlog -= len;
309                 sfq_dec(q, x);
310                 sch->q.qlen--;
311                 qdisc_qstats_drop(sch);
312                 qdisc_qstats_backlog_dec(sch, skb);
313                 kfree_skb(skb);
314                 return len;
315         }
316 
317         if (d == 1) {
318                 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
319                 x = q->tail->next;
320                 slot = &q->slots[x];
321                 q->tail->next = slot->next;
322                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
323                 goto drop;
324         }
325 
326         return 0;
327 }
328 
329 /* Is ECN parameter configured */
330 static int sfq_prob_mark(const struct sfq_sched_data *q)
331 {
332         return q->flags & TC_RED_ECN;
333 }
334 
335 /* Should packets over max threshold just be marked */
336 static int sfq_hard_mark(const struct sfq_sched_data *q)
337 {
338         return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
339 }
340 
341 static int sfq_headdrop(const struct sfq_sched_data *q)
342 {
343         return q->headdrop;
344 }
345 
346 static int
347 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
348 {
349         struct sfq_sched_data *q = qdisc_priv(sch);
350         unsigned int hash, dropped;
351         sfq_index x, qlen;
352         struct sfq_slot *slot;
353         int uninitialized_var(ret);
354         struct sk_buff *head;
355         int delta;
356 
357         hash = sfq_classify(skb, sch, &ret);
358         if (hash == 0) {
359                 if (ret & __NET_XMIT_BYPASS)
360                         qdisc_qstats_drop(sch);
361                 kfree_skb(skb);
362                 return ret;
363         }
364         hash--;
365 
366         x = q->ht[hash];
367         slot = &q->slots[x];
368         if (x == SFQ_EMPTY_SLOT) {
369                 x = q->dep[0].next; /* get a free slot */
370                 if (x >= SFQ_MAX_FLOWS)
371                         return qdisc_drop(skb, sch, to_free);
372                 q->ht[hash] = x;
373                 slot = &q->slots[x];
374                 slot->hash = hash;
375                 slot->backlog = 0; /* should already be 0 anyway... */
376                 red_set_vars(&slot->vars);
377                 goto enqueue;
378         }
379         if (q->red_parms) {
380                 slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
381                                                         &slot->vars,
382                                                         slot->backlog);
383                 switch (red_action(q->red_parms,
384                                    &slot->vars,
385                                    slot->vars.qavg)) {
386                 case RED_DONT_MARK:
387                         break;
388 
389                 case RED_PROB_MARK:
390                         qdisc_qstats_overlimit(sch);
391                         if (sfq_prob_mark(q)) {
392                                 /* We know we have at least one packet in queue */
393                                 if (sfq_headdrop(q) &&
394                                     INET_ECN_set_ce(slot->skblist_next)) {
395                                         q->stats.prob_mark_head++;
396                                         break;
397                                 }
398                                 if (INET_ECN_set_ce(skb)) {
399                                         q->stats.prob_mark++;
400                                         break;
401                                 }
402                         }
403                         q->stats.prob_drop++;
404                         goto congestion_drop;
405 
406                 case RED_HARD_MARK:
407                         qdisc_qstats_overlimit(sch);
408                         if (sfq_hard_mark(q)) {
409                                 /* We know we have at least one packet in queue */
410                                 if (sfq_headdrop(q) &&
411                                     INET_ECN_set_ce(slot->skblist_next)) {
412                                         q->stats.forced_mark_head++;
413                                         break;
414                                 }
415                                 if (INET_ECN_set_ce(skb)) {
416                                         q->stats.forced_mark++;
417                                         break;
418                                 }
419                         }
420                         q->stats.forced_drop++;
421                         goto congestion_drop;
422                 }
423         }
424 
425         if (slot->qlen >= q->maxdepth) {
426 congestion_drop:
427                 if (!sfq_headdrop(q))
428                         return qdisc_drop(skb, sch, to_free);
429 
430                 /* We know we have at least one packet in queue */
431                 head = slot_dequeue_head(slot);
432                 delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
433                 sch->qstats.backlog -= delta;
434                 slot->backlog -= delta;
435                 qdisc_drop(head, sch, to_free);
436 
437                 slot_queue_add(slot, skb);
438                 return NET_XMIT_CN;
439         }
440 
441 enqueue:
442         qdisc_qstats_backlog_inc(sch, skb);
443         slot->backlog += qdisc_pkt_len(skb);
444         slot_queue_add(slot, skb);
445         sfq_inc(q, x);
446         if (slot->qlen == 1) {          /* The flow is new */
447                 if (q->tail == NULL) {  /* It is the first flow */
448                         slot->next = x;
449                 } else {
450                         slot->next = q->tail->next;
451                         q->tail->next = x;
452                 }
453                 /* We put this flow at the end of our flow list.
454                  * This might sound unfair for a new flow to wait after old ones,
455                  * but we could endup servicing new flows only, and freeze old ones.
456                  */
457                 q->tail = slot;
458                 /* We could use a bigger initial quantum for new flows */
459                 slot->allot = q->scaled_quantum;
460         }
461         if (++sch->q.qlen <= q->limit)
462                 return NET_XMIT_SUCCESS;
463 
464         qlen = slot->qlen;
465         dropped = sfq_drop(sch);
466         /* Return Congestion Notification only if we dropped a packet
467          * from this flow.
468          */
469         if (qlen != slot->qlen)
470                 return NET_XMIT_CN;
471 
472         /* As we dropped a packet, better let upper stack know this */
473         qdisc_tree_reduce_backlog(sch, 1, dropped);
474         return NET_XMIT_SUCCESS;
475 }
476 
477 static struct sk_buff *
478 sfq_dequeue(struct Qdisc *sch)
479 {
480         struct sfq_sched_data *q = qdisc_priv(sch);
481         struct sk_buff *skb;
482         sfq_index a, next_a;
483         struct sfq_slot *slot;
484 
485         /* No active slots */
486         if (q->tail == NULL)
487                 return NULL;
488 
489 next_slot:
490         a = q->tail->next;
491         slot = &q->slots[a];
492         if (slot->allot <= 0) {
493                 q->tail = slot;
494                 slot->allot += q->scaled_quantum;
495                 goto next_slot;
496         }
497         skb = slot_dequeue_head(slot);
498         sfq_dec(q, a);
499         qdisc_bstats_update(sch, skb);
500         sch->q.qlen--;
501         qdisc_qstats_backlog_dec(sch, skb);
502         slot->backlog -= qdisc_pkt_len(skb);
503         /* Is the slot empty? */
504         if (slot->qlen == 0) {
505                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
506                 next_a = slot->next;
507                 if (a == next_a) {
508                         q->tail = NULL; /* no more active slots */
509                         return skb;
510                 }
511                 q->tail->next = next_a;
512         } else {
513                 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
514         }
515         return skb;
516 }
517 
518 static void
519 sfq_reset(struct Qdisc *sch)
520 {
521         struct sk_buff *skb;
522 
523         while ((skb = sfq_dequeue(sch)) != NULL)
524                 rtnl_kfree_skbs(skb, skb);
525 }
526 
527 /*
528  * When q->perturbation is changed, we rehash all queued skbs
529  * to avoid OOO (Out Of Order) effects.
530  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
531  * counters.
532  */
533 static void sfq_rehash(struct Qdisc *sch)
534 {
535         struct sfq_sched_data *q = qdisc_priv(sch);
536         struct sk_buff *skb;
537         int i;
538         struct sfq_slot *slot;
539         struct sk_buff_head list;
540         int dropped = 0;
541         unsigned int drop_len = 0;
542 
543         __skb_queue_head_init(&list);
544 
545         for (i = 0; i < q->maxflows; i++) {
546                 slot = &q->slots[i];
547                 if (!slot->qlen)
548                         continue;
549                 while (slot->qlen) {
550                         skb = slot_dequeue_head(slot);
551                         sfq_dec(q, i);
552                         __skb_queue_tail(&list, skb);
553                 }
554                 slot->backlog = 0;
555                 red_set_vars(&slot->vars);
556                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
557         }
558         q->tail = NULL;
559 
560         while ((skb = __skb_dequeue(&list)) != NULL) {
561                 unsigned int hash = sfq_hash(q, skb);
562                 sfq_index x = q->ht[hash];
563 
564                 slot = &q->slots[x];
565                 if (x == SFQ_EMPTY_SLOT) {
566                         x = q->dep[0].next; /* get a free slot */
567                         if (x >= SFQ_MAX_FLOWS) {
568 drop:
569                                 qdisc_qstats_backlog_dec(sch, skb);
570                                 drop_len += qdisc_pkt_len(skb);
571                                 kfree_skb(skb);
572                                 dropped++;
573                                 continue;
574                         }
575                         q->ht[hash] = x;
576                         slot = &q->slots[x];
577                         slot->hash = hash;
578                 }
579                 if (slot->qlen >= q->maxdepth)
580                         goto drop;
581                 slot_queue_add(slot, skb);
582                 if (q->red_parms)
583                         slot->vars.qavg = red_calc_qavg(q->red_parms,
584                                                         &slot->vars,
585                                                         slot->backlog);
586                 slot->backlog += qdisc_pkt_len(skb);
587                 sfq_inc(q, x);
588                 if (slot->qlen == 1) {          /* The flow is new */
589                         if (q->tail == NULL) {  /* It is the first flow */
590                                 slot->next = x;
591                         } else {
592                                 slot->next = q->tail->next;
593                                 q->tail->next = x;
594                         }
595                         q->tail = slot;
596                         slot->allot = q->scaled_quantum;
597                 }
598         }
599         sch->q.qlen -= dropped;
600         qdisc_tree_reduce_backlog(sch, dropped, drop_len);
601 }
602 
603 static void sfq_perturbation(unsigned long arg)
604 {
605         struct Qdisc *sch = (struct Qdisc *)arg;
606         struct sfq_sched_data *q = qdisc_priv(sch);
607         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
608 
609         spin_lock(root_lock);
610         q->perturbation = prandom_u32();
611         if (!q->filter_list && q->tail)
612                 sfq_rehash(sch);
613         spin_unlock(root_lock);
614 
615         if (q->perturb_period)
616                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
617 }
618 
619 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
620 {
621         struct sfq_sched_data *q = qdisc_priv(sch);
622         struct tc_sfq_qopt *ctl = nla_data(opt);
623         struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
624         unsigned int qlen, dropped = 0;
625         struct red_parms *p = NULL;
626 
627         if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
628                 return -EINVAL;
629         if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
630                 ctl_v1 = nla_data(opt);
631         if (ctl->divisor &&
632             (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
633                 return -EINVAL;
634         if (ctl_v1 && ctl_v1->qth_min) {
635                 p = kmalloc(sizeof(*p), GFP_KERNEL);
636                 if (!p)
637                         return -ENOMEM;
638         }
639         sch_tree_lock(sch);
640         if (ctl->quantum) {
641                 q->quantum = ctl->quantum;
642                 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
643         }
644         q->perturb_period = ctl->perturb_period * HZ;
645         if (ctl->flows)
646                 q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
647         if (ctl->divisor) {
648                 q->divisor = ctl->divisor;
649                 q->maxflows = min_t(u32, q->maxflows, q->divisor);
650         }
651         if (ctl_v1) {
652                 if (ctl_v1->depth)
653                         q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
654                 if (p) {
655                         swap(q->red_parms, p);
656                         red_set_parms(q->red_parms,
657                                       ctl_v1->qth_min, ctl_v1->qth_max,
658                                       ctl_v1->Wlog,
659                                       ctl_v1->Plog, ctl_v1->Scell_log,
660                                       NULL,
661                                       ctl_v1->max_P);
662                 }
663                 q->flags = ctl_v1->flags;
664                 q->headdrop = ctl_v1->headdrop;
665         }
666         if (ctl->limit) {
667                 q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
668                 q->maxflows = min_t(u32, q->maxflows, q->limit);
669         }
670 
671         qlen = sch->q.qlen;
672         while (sch->q.qlen > q->limit)
673                 dropped += sfq_drop(sch);
674         qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
675 
676         del_timer(&q->perturb_timer);
677         if (q->perturb_period) {
678                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
679                 q->perturbation = prandom_u32();
680         }
681         sch_tree_unlock(sch);
682         kfree(p);
683         return 0;
684 }
685 
686 static void *sfq_alloc(size_t sz)
687 {
688         void *ptr = kmalloc(sz, GFP_KERNEL | __GFP_NOWARN);
689 
690         if (!ptr)
691                 ptr = vmalloc(sz);
692         return ptr;
693 }
694 
695 static void sfq_free(void *addr)
696 {
697         kvfree(addr);
698 }
699 
700 static void sfq_destroy(struct Qdisc *sch)
701 {
702         struct sfq_sched_data *q = qdisc_priv(sch);
703 
704         tcf_destroy_chain(&q->filter_list);
705         q->perturb_period = 0;
706         del_timer_sync(&q->perturb_timer);
707         sfq_free(q->ht);
708         sfq_free(q->slots);
709         kfree(q->red_parms);
710 }
711 
712 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
713 {
714         struct sfq_sched_data *q = qdisc_priv(sch);
715         int i;
716 
717         q->perturb_timer.function = sfq_perturbation;
718         q->perturb_timer.data = (unsigned long)sch;
719         init_timer_deferrable(&q->perturb_timer);
720 
721         for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
722                 q->dep[i].next = i + SFQ_MAX_FLOWS;
723                 q->dep[i].prev = i + SFQ_MAX_FLOWS;
724         }
725 
726         q->limit = SFQ_MAX_DEPTH;
727         q->maxdepth = SFQ_MAX_DEPTH;
728         q->cur_depth = 0;
729         q->tail = NULL;
730         q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
731         q->maxflows = SFQ_DEFAULT_FLOWS;
732         q->quantum = psched_mtu(qdisc_dev(sch));
733         q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
734         q->perturb_period = 0;
735         q->perturbation = prandom_u32();
736 
737         if (opt) {
738                 int err = sfq_change(sch, opt);
739                 if (err)
740                         return err;
741         }
742 
743         q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
744         q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
745         if (!q->ht || !q->slots) {
746                 /* Note: sfq_destroy() will be called by our caller */
747                 return -ENOMEM;
748         }
749 
750         for (i = 0; i < q->divisor; i++)
751                 q->ht[i] = SFQ_EMPTY_SLOT;
752 
753         for (i = 0; i < q->maxflows; i++) {
754                 slot_queue_init(&q->slots[i]);
755                 sfq_link(q, i);
756         }
757         if (q->limit >= 1)
758                 sch->flags |= TCQ_F_CAN_BYPASS;
759         else
760                 sch->flags &= ~TCQ_F_CAN_BYPASS;
761         return 0;
762 }
763 
764 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
765 {
766         struct sfq_sched_data *q = qdisc_priv(sch);
767         unsigned char *b = skb_tail_pointer(skb);
768         struct tc_sfq_qopt_v1 opt;
769         struct red_parms *p = q->red_parms;
770 
771         memset(&opt, 0, sizeof(opt));
772         opt.v0.quantum  = q->quantum;
773         opt.v0.perturb_period = q->perturb_period / HZ;
774         opt.v0.limit    = q->limit;
775         opt.v0.divisor  = q->divisor;
776         opt.v0.flows    = q->maxflows;
777         opt.depth       = q->maxdepth;
778         opt.headdrop    = q->headdrop;
779 
780         if (p) {
781                 opt.qth_min     = p->qth_min >> p->Wlog;
782                 opt.qth_max     = p->qth_max >> p->Wlog;
783                 opt.Wlog        = p->Wlog;
784                 opt.Plog        = p->Plog;
785                 opt.Scell_log   = p->Scell_log;
786                 opt.max_P       = p->max_P;
787         }
788         memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
789         opt.flags       = q->flags;
790 
791         if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
792                 goto nla_put_failure;
793 
794         return skb->len;
795 
796 nla_put_failure:
797         nlmsg_trim(skb, b);
798         return -1;
799 }
800 
801 static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
802 {
803         return NULL;
804 }
805 
806 static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
807 {
808         return 0;
809 }
810 
811 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
812                               u32 classid)
813 {
814         /* we cannot bypass queue discipline anymore */
815         sch->flags &= ~TCQ_F_CAN_BYPASS;
816         return 0;
817 }
818 
819 static void sfq_put(struct Qdisc *q, unsigned long cl)
820 {
821 }
822 
823 static struct tcf_proto __rcu **sfq_find_tcf(struct Qdisc *sch,
824                                              unsigned long cl)
825 {
826         struct sfq_sched_data *q = qdisc_priv(sch);
827 
828         if (cl)
829                 return NULL;
830         return &q->filter_list;
831 }
832 
833 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
834                           struct sk_buff *skb, struct tcmsg *tcm)
835 {
836         tcm->tcm_handle |= TC_H_MIN(cl);
837         return 0;
838 }
839 
840 static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
841                                 struct gnet_dump *d)
842 {
843         struct sfq_sched_data *q = qdisc_priv(sch);
844         sfq_index idx = q->ht[cl - 1];
845         struct gnet_stats_queue qs = { 0 };
846         struct tc_sfq_xstats xstats = { 0 };
847 
848         if (idx != SFQ_EMPTY_SLOT) {
849                 const struct sfq_slot *slot = &q->slots[idx];
850 
851                 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
852                 qs.qlen = slot->qlen;
853                 qs.backlog = slot->backlog;
854         }
855         if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
856                 return -1;
857         return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
858 }
859 
860 static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
861 {
862         struct sfq_sched_data *q = qdisc_priv(sch);
863         unsigned int i;
864 
865         if (arg->stop)
866                 return;
867 
868         for (i = 0; i < q->divisor; i++) {
869                 if (q->ht[i] == SFQ_EMPTY_SLOT ||
870                     arg->count < arg->skip) {
871                         arg->count++;
872                         continue;
873                 }
874                 if (arg->fn(sch, i + 1, arg) < 0) {
875                         arg->stop = 1;
876                         break;
877                 }
878                 arg->count++;
879         }
880 }
881 
882 static const struct Qdisc_class_ops sfq_class_ops = {
883         .leaf           =       sfq_leaf,
884         .get            =       sfq_get,
885         .put            =       sfq_put,
886         .tcf_chain      =       sfq_find_tcf,
887         .bind_tcf       =       sfq_bind,
888         .unbind_tcf     =       sfq_put,
889         .dump           =       sfq_dump_class,
890         .dump_stats     =       sfq_dump_class_stats,
891         .walk           =       sfq_walk,
892 };
893 
894 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
895         .cl_ops         =       &sfq_class_ops,
896         .id             =       "sfq",
897         .priv_size      =       sizeof(struct sfq_sched_data),
898         .enqueue        =       sfq_enqueue,
899         .dequeue        =       sfq_dequeue,
900         .peek           =       qdisc_peek_dequeued,
901         .init           =       sfq_init,
902         .reset          =       sfq_reset,
903         .destroy        =       sfq_destroy,
904         .change         =       NULL,
905         .dump           =       sfq_dump,
906         .owner          =       THIS_MODULE,
907 };
908 
909 static int __init sfq_module_init(void)
910 {
911         return register_qdisc(&sfq_qdisc_ops);
912 }
913 static void __exit sfq_module_exit(void)
914 {
915         unregister_qdisc(&sfq_qdisc_ops);
916 }
917 module_init(sfq_module_init)
918 module_exit(sfq_module_exit)
919 MODULE_LICENSE("GPL");
920 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp