~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_sfq.c

Version: ~ [ linux-6.3-rc3 ] ~ [ linux-6.2.7 ] ~ [ linux-6.1.20 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.103 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.175 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.237 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.278 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.310 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * net/sched/sch_sfq.c  Stochastic Fairness Queueing discipline.
  3  *
  4  *              This program is free software; you can redistribute it and/or
  5  *              modify it under the terms of the GNU General Public License
  6  *              as published by the Free Software Foundation; either version
  7  *              2 of the License, or (at your option) any later version.
  8  *
  9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 10  */
 11 
 12 #include <linux/module.h>
 13 #include <linux/types.h>
 14 #include <linux/kernel.h>
 15 #include <linux/jiffies.h>
 16 #include <linux/string.h>
 17 #include <linux/in.h>
 18 #include <linux/errno.h>
 19 #include <linux/init.h>
 20 #include <linux/skbuff.h>
 21 #include <linux/jhash.h>
 22 #include <linux/slab.h>
 23 #include <linux/vmalloc.h>
 24 #include <net/netlink.h>
 25 #include <net/pkt_sched.h>
 26 #include <net/pkt_cls.h>
 27 #include <net/red.h>
 28 
 29 
 30 /*      Stochastic Fairness Queuing algorithm.
 31         =======================================
 32 
 33         Source:
 34         Paul E. McKenney "Stochastic Fairness Queuing",
 35         IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
 36 
 37         Paul E. McKenney "Stochastic Fairness Queuing",
 38         "Interworking: Research and Experience", v.2, 1991, p.113-131.
 39 
 40 
 41         See also:
 42         M. Shreedhar and George Varghese "Efficient Fair
 43         Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
 44 
 45 
 46         This is not the thing that is usually called (W)FQ nowadays.
 47         It does not use any timestamp mechanism, but instead
 48         processes queues in round-robin order.
 49 
 50         ADVANTAGE:
 51 
 52         - It is very cheap. Both CPU and memory requirements are minimal.
 53 
 54         DRAWBACKS:
 55 
 56         - "Stochastic" -> It is not 100% fair.
 57         When hash collisions occur, several flows are considered as one.
 58 
 59         - "Round-robin" -> It introduces larger delays than virtual clock
 60         based schemes, and should not be used for isolating interactive
 61         traffic from non-interactive. It means, that this scheduler
 62         should be used as leaf of CBQ or P3, which put interactive traffic
 63         to higher priority band.
 64 
 65         We still need true WFQ for top level CSZ, but using WFQ
 66         for the best effort traffic is absolutely pointless:
 67         SFQ is superior for this purpose.
 68 
 69         IMPLEMENTATION:
 70         This implementation limits :
 71         - maximal queue length per flow to 127 packets.
 72         - max mtu to 2^18-1;
 73         - max 65408 flows,
 74         - number of hash buckets to 65536.
 75 
 76         It is easy to increase these values, but not in flight.  */
 77 
 78 #define SFQ_MAX_DEPTH           127 /* max number of packets per flow */
 79 #define SFQ_DEFAULT_FLOWS       128
 80 #define SFQ_MAX_FLOWS           (0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
 81 #define SFQ_EMPTY_SLOT          0xffff
 82 #define SFQ_DEFAULT_HASH_DIVISOR 1024
 83 
 84 /* We use 16 bits to store allot, and want to handle packets up to 64K
 85  * Scale allot by 8 (1<<3) so that no overflow occurs.
 86  */
 87 #define SFQ_ALLOT_SHIFT         3
 88 #define SFQ_ALLOT_SIZE(X)       DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 89 
 90 /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
 91 typedef u16 sfq_index;
 92 
 93 /*
 94  * We dont use pointers to save space.
 95  * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
 96  * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
 97  * are 'pointers' to dep[] array
 98  */
 99 struct sfq_head {
100         sfq_index       next;
101         sfq_index       prev;
102 };
103 
104 struct sfq_slot {
105         struct sk_buff  *skblist_next;
106         struct sk_buff  *skblist_prev;
107         sfq_index       qlen; /* number of skbs in skblist */
108         sfq_index       next; /* next slot in sfq RR chain */
109         struct sfq_head dep; /* anchor in dep[] chains */
110         unsigned short  hash; /* hash value (index in ht[]) */
111         short           allot; /* credit for this slot */
112 
113         unsigned int    backlog;
114         struct red_vars vars;
115 };
116 
117 struct sfq_sched_data {
118 /* frequently used fields */
119         int             limit;          /* limit of total number of packets in this qdisc */
120         unsigned int    divisor;        /* number of slots in hash table */
121         u8              headdrop;
122         u8              maxdepth;       /* limit of packets per flow */
123 
124         u32             perturbation;
125         u8              cur_depth;      /* depth of longest slot */
126         u8              flags;
127         unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
128         struct tcf_proto __rcu *filter_list;
129         struct tcf_block *block;
130         sfq_index       *ht;            /* Hash table ('divisor' slots) */
131         struct sfq_slot *slots;         /* Flows table ('maxflows' entries) */
132 
133         struct red_parms *red_parms;
134         struct tc_sfqred_stats stats;
135         struct sfq_slot *tail;          /* current slot in round */
136 
137         struct sfq_head dep[SFQ_MAX_DEPTH + 1];
138                                         /* Linked lists of slots, indexed by depth
139                                          * dep[0] : list of unused flows
140                                          * dep[1] : list of flows with 1 packet
141                                          * dep[X] : list of flows with X packets
142                                          */
143 
144         unsigned int    maxflows;       /* number of flows in flows array */
145         int             perturb_period;
146         unsigned int    quantum;        /* Allotment per round: MUST BE >= MTU */
147         struct timer_list perturb_timer;
148 };
149 
150 /*
151  * sfq_head are either in a sfq_slot or in dep[] array
152  */
153 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
154 {
155         if (val < SFQ_MAX_FLOWS)
156                 return &q->slots[val].dep;
157         return &q->dep[val - SFQ_MAX_FLOWS];
158 }
159 
160 static unsigned int sfq_hash(const struct sfq_sched_data *q,
161                              const struct sk_buff *skb)
162 {
163         return skb_get_hash_perturb(skb, q->perturbation) & (q->divisor - 1);
164 }
165 
166 static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
167                                  int *qerr)
168 {
169         struct sfq_sched_data *q = qdisc_priv(sch);
170         struct tcf_result res;
171         struct tcf_proto *fl;
172         int result;
173 
174         if (TC_H_MAJ(skb->priority) == sch->handle &&
175             TC_H_MIN(skb->priority) > 0 &&
176             TC_H_MIN(skb->priority) <= q->divisor)
177                 return TC_H_MIN(skb->priority);
178 
179         fl = rcu_dereference_bh(q->filter_list);
180         if (!fl)
181                 return sfq_hash(q, skb) + 1;
182 
183         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
184         result = tcf_classify(skb, fl, &res, false);
185         if (result >= 0) {
186 #ifdef CONFIG_NET_CLS_ACT
187                 switch (result) {
188                 case TC_ACT_STOLEN:
189                 case TC_ACT_QUEUED:
190                 case TC_ACT_TRAP:
191                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
192                 case TC_ACT_SHOT:
193                         return 0;
194                 }
195 #endif
196                 if (TC_H_MIN(res.classid) <= q->divisor)
197                         return TC_H_MIN(res.classid);
198         }
199         return 0;
200 }
201 
202 /*
203  * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
204  */
205 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
206 {
207         sfq_index p, n;
208         struct sfq_slot *slot = &q->slots[x];
209         int qlen = slot->qlen;
210 
211         p = qlen + SFQ_MAX_FLOWS;
212         n = q->dep[qlen].next;
213 
214         slot->dep.next = n;
215         slot->dep.prev = p;
216 
217         q->dep[qlen].next = x;          /* sfq_dep_head(q, p)->next = x */
218         sfq_dep_head(q, n)->prev = x;
219 }
220 
221 #define sfq_unlink(q, x, n, p)                  \
222         do {                                    \
223                 n = q->slots[x].dep.next;       \
224                 p = q->slots[x].dep.prev;       \
225                 sfq_dep_head(q, p)->next = n;   \
226                 sfq_dep_head(q, n)->prev = p;   \
227         } while (0)
228 
229 
230 static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x)
231 {
232         sfq_index p, n;
233         int d;
234 
235         sfq_unlink(q, x, n, p);
236 
237         d = q->slots[x].qlen--;
238         if (n == p && q->cur_depth == d)
239                 q->cur_depth--;
240         sfq_link(q, x);
241 }
242 
243 static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x)
244 {
245         sfq_index p, n;
246         int d;
247 
248         sfq_unlink(q, x, n, p);
249 
250         d = ++q->slots[x].qlen;
251         if (q->cur_depth < d)
252                 q->cur_depth = d;
253         sfq_link(q, x);
254 }
255 
256 /* helper functions : might be changed when/if skb use a standard list_head */
257 
258 /* remove one skb from tail of slot queue */
259 static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot)
260 {
261         struct sk_buff *skb = slot->skblist_prev;
262 
263         slot->skblist_prev = skb->prev;
264         skb->prev->next = (struct sk_buff *)slot;
265         skb->next = skb->prev = NULL;
266         return skb;
267 }
268 
269 /* remove one skb from head of slot queue */
270 static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
271 {
272         struct sk_buff *skb = slot->skblist_next;
273 
274         slot->skblist_next = skb->next;
275         skb->next->prev = (struct sk_buff *)slot;
276         skb->next = skb->prev = NULL;
277         return skb;
278 }
279 
280 static inline void slot_queue_init(struct sfq_slot *slot)
281 {
282         memset(slot, 0, sizeof(*slot));
283         slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
284 }
285 
286 /* add skb to slot queue (tail add) */
287 static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb)
288 {
289         skb->prev = slot->skblist_prev;
290         skb->next = (struct sk_buff *)slot;
291         slot->skblist_prev->next = skb;
292         slot->skblist_prev = skb;
293 }
294 
295 static unsigned int sfq_drop(struct Qdisc *sch)
296 {
297         struct sfq_sched_data *q = qdisc_priv(sch);
298         sfq_index x, d = q->cur_depth;
299         struct sk_buff *skb;
300         unsigned int len;
301         struct sfq_slot *slot;
302 
303         /* Queue is full! Find the longest slot and drop tail packet from it */
304         if (d > 1) {
305                 x = q->dep[d].next;
306                 slot = &q->slots[x];
307 drop:
308                 skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
309                 len = qdisc_pkt_len(skb);
310                 slot->backlog -= len;
311                 sfq_dec(q, x);
312                 sch->q.qlen--;
313                 qdisc_qstats_drop(sch);
314                 qdisc_qstats_backlog_dec(sch, skb);
315                 kfree_skb(skb);
316                 return len;
317         }
318 
319         if (d == 1) {
320                 /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
321                 x = q->tail->next;
322                 slot = &q->slots[x];
323                 q->tail->next = slot->next;
324                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
325                 goto drop;
326         }
327 
328         return 0;
329 }
330 
331 /* Is ECN parameter configured */
332 static int sfq_prob_mark(const struct sfq_sched_data *q)
333 {
334         return q->flags & TC_RED_ECN;
335 }
336 
337 /* Should packets over max threshold just be marked */
338 static int sfq_hard_mark(const struct sfq_sched_data *q)
339 {
340         return (q->flags & (TC_RED_ECN | TC_RED_HARDDROP)) == TC_RED_ECN;
341 }
342 
343 static int sfq_headdrop(const struct sfq_sched_data *q)
344 {
345         return q->headdrop;
346 }
347 
348 static int
349 sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
350 {
351         struct sfq_sched_data *q = qdisc_priv(sch);
352         unsigned int hash, dropped;
353         sfq_index x, qlen;
354         struct sfq_slot *slot;
355         int uninitialized_var(ret);
356         struct sk_buff *head;
357         int delta;
358 
359         hash = sfq_classify(skb, sch, &ret);
360         if (hash == 0) {
361                 if (ret & __NET_XMIT_BYPASS)
362                         qdisc_qstats_drop(sch);
363                 kfree_skb(skb);
364                 return ret;
365         }
366         hash--;
367 
368         x = q->ht[hash];
369         slot = &q->slots[x];
370         if (x == SFQ_EMPTY_SLOT) {
371                 x = q->dep[0].next; /* get a free slot */
372                 if (x >= SFQ_MAX_FLOWS)
373                         return qdisc_drop(skb, sch, to_free);
374                 q->ht[hash] = x;
375                 slot = &q->slots[x];
376                 slot->hash = hash;
377                 slot->backlog = 0; /* should already be 0 anyway... */
378                 red_set_vars(&slot->vars);
379                 goto enqueue;
380         }
381         if (q->red_parms) {
382                 slot->vars.qavg = red_calc_qavg_no_idle_time(q->red_parms,
383                                                         &slot->vars,
384                                                         slot->backlog);
385                 switch (red_action(q->red_parms,
386                                    &slot->vars,
387                                    slot->vars.qavg)) {
388                 case RED_DONT_MARK:
389                         break;
390 
391                 case RED_PROB_MARK:
392                         qdisc_qstats_overlimit(sch);
393                         if (sfq_prob_mark(q)) {
394                                 /* We know we have at least one packet in queue */
395                                 if (sfq_headdrop(q) &&
396                                     INET_ECN_set_ce(slot->skblist_next)) {
397                                         q->stats.prob_mark_head++;
398                                         break;
399                                 }
400                                 if (INET_ECN_set_ce(skb)) {
401                                         q->stats.prob_mark++;
402                                         break;
403                                 }
404                         }
405                         q->stats.prob_drop++;
406                         goto congestion_drop;
407 
408                 case RED_HARD_MARK:
409                         qdisc_qstats_overlimit(sch);
410                         if (sfq_hard_mark(q)) {
411                                 /* We know we have at least one packet in queue */
412                                 if (sfq_headdrop(q) &&
413                                     INET_ECN_set_ce(slot->skblist_next)) {
414                                         q->stats.forced_mark_head++;
415                                         break;
416                                 }
417                                 if (INET_ECN_set_ce(skb)) {
418                                         q->stats.forced_mark++;
419                                         break;
420                                 }
421                         }
422                         q->stats.forced_drop++;
423                         goto congestion_drop;
424                 }
425         }
426 
427         if (slot->qlen >= q->maxdepth) {
428 congestion_drop:
429                 if (!sfq_headdrop(q))
430                         return qdisc_drop(skb, sch, to_free);
431 
432                 /* We know we have at least one packet in queue */
433                 head = slot_dequeue_head(slot);
434                 delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb);
435                 sch->qstats.backlog -= delta;
436                 slot->backlog -= delta;
437                 qdisc_drop(head, sch, to_free);
438 
439                 slot_queue_add(slot, skb);
440                 qdisc_tree_reduce_backlog(sch, 0, delta);
441                 return NET_XMIT_CN;
442         }
443 
444 enqueue:
445         qdisc_qstats_backlog_inc(sch, skb);
446         slot->backlog += qdisc_pkt_len(skb);
447         slot_queue_add(slot, skb);
448         sfq_inc(q, x);
449         if (slot->qlen == 1) {          /* The flow is new */
450                 if (q->tail == NULL) {  /* It is the first flow */
451                         slot->next = x;
452                 } else {
453                         slot->next = q->tail->next;
454                         q->tail->next = x;
455                 }
456                 /* We put this flow at the end of our flow list.
457                  * This might sound unfair for a new flow to wait after old ones,
458                  * but we could endup servicing new flows only, and freeze old ones.
459                  */
460                 q->tail = slot;
461                 /* We could use a bigger initial quantum for new flows */
462                 slot->allot = q->scaled_quantum;
463         }
464         if (++sch->q.qlen <= q->limit)
465                 return NET_XMIT_SUCCESS;
466 
467         qlen = slot->qlen;
468         dropped = sfq_drop(sch);
469         /* Return Congestion Notification only if we dropped a packet
470          * from this flow.
471          */
472         if (qlen != slot->qlen) {
473                 qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
474                 return NET_XMIT_CN;
475         }
476 
477         /* As we dropped a packet, better let upper stack know this */
478         qdisc_tree_reduce_backlog(sch, 1, dropped);
479         return NET_XMIT_SUCCESS;
480 }
481 
482 static struct sk_buff *
483 sfq_dequeue(struct Qdisc *sch)
484 {
485         struct sfq_sched_data *q = qdisc_priv(sch);
486         struct sk_buff *skb;
487         sfq_index a, next_a;
488         struct sfq_slot *slot;
489 
490         /* No active slots */
491         if (q->tail == NULL)
492                 return NULL;
493 
494 next_slot:
495         a = q->tail->next;
496         slot = &q->slots[a];
497         if (slot->allot <= 0) {
498                 q->tail = slot;
499                 slot->allot += q->scaled_quantum;
500                 goto next_slot;
501         }
502         skb = slot_dequeue_head(slot);
503         sfq_dec(q, a);
504         qdisc_bstats_update(sch, skb);
505         sch->q.qlen--;
506         qdisc_qstats_backlog_dec(sch, skb);
507         slot->backlog -= qdisc_pkt_len(skb);
508         /* Is the slot empty? */
509         if (slot->qlen == 0) {
510                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
511                 next_a = slot->next;
512                 if (a == next_a) {
513                         q->tail = NULL; /* no more active slots */
514                         return skb;
515                 }
516                 q->tail->next = next_a;
517         } else {
518                 slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb));
519         }
520         return skb;
521 }
522 
523 static void
524 sfq_reset(struct Qdisc *sch)
525 {
526         struct sk_buff *skb;
527 
528         while ((skb = sfq_dequeue(sch)) != NULL)
529                 rtnl_kfree_skbs(skb, skb);
530 }
531 
532 /*
533  * When q->perturbation is changed, we rehash all queued skbs
534  * to avoid OOO (Out Of Order) effects.
535  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
536  * counters.
537  */
538 static void sfq_rehash(struct Qdisc *sch)
539 {
540         struct sfq_sched_data *q = qdisc_priv(sch);
541         struct sk_buff *skb;
542         int i;
543         struct sfq_slot *slot;
544         struct sk_buff_head list;
545         int dropped = 0;
546         unsigned int drop_len = 0;
547 
548         __skb_queue_head_init(&list);
549 
550         for (i = 0; i < q->maxflows; i++) {
551                 slot = &q->slots[i];
552                 if (!slot->qlen)
553                         continue;
554                 while (slot->qlen) {
555                         skb = slot_dequeue_head(slot);
556                         sfq_dec(q, i);
557                         __skb_queue_tail(&list, skb);
558                 }
559                 slot->backlog = 0;
560                 red_set_vars(&slot->vars);
561                 q->ht[slot->hash] = SFQ_EMPTY_SLOT;
562         }
563         q->tail = NULL;
564 
565         while ((skb = __skb_dequeue(&list)) != NULL) {
566                 unsigned int hash = sfq_hash(q, skb);
567                 sfq_index x = q->ht[hash];
568 
569                 slot = &q->slots[x];
570                 if (x == SFQ_EMPTY_SLOT) {
571                         x = q->dep[0].next; /* get a free slot */
572                         if (x >= SFQ_MAX_FLOWS) {
573 drop:
574                                 qdisc_qstats_backlog_dec(sch, skb);
575                                 drop_len += qdisc_pkt_len(skb);
576                                 kfree_skb(skb);
577                                 dropped++;
578                                 continue;
579                         }
580                         q->ht[hash] = x;
581                         slot = &q->slots[x];
582                         slot->hash = hash;
583                 }
584                 if (slot->qlen >= q->maxdepth)
585                         goto drop;
586                 slot_queue_add(slot, skb);
587                 if (q->red_parms)
588                         slot->vars.qavg = red_calc_qavg(q->red_parms,
589                                                         &slot->vars,
590                                                         slot->backlog);
591                 slot->backlog += qdisc_pkt_len(skb);
592                 sfq_inc(q, x);
593                 if (slot->qlen == 1) {          /* The flow is new */
594                         if (q->tail == NULL) {  /* It is the first flow */
595                                 slot->next = x;
596                         } else {
597                                 slot->next = q->tail->next;
598                                 q->tail->next = x;
599                         }
600                         q->tail = slot;
601                         slot->allot = q->scaled_quantum;
602                 }
603         }
604         sch->q.qlen -= dropped;
605         qdisc_tree_reduce_backlog(sch, dropped, drop_len);
606 }
607 
608 static void sfq_perturbation(unsigned long arg)
609 {
610         struct Qdisc *sch = (struct Qdisc *)arg;
611         struct sfq_sched_data *q = qdisc_priv(sch);
612         spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
613 
614         spin_lock(root_lock);
615         q->perturbation = prandom_u32();
616         if (!q->filter_list && q->tail)
617                 sfq_rehash(sch);
618         spin_unlock(root_lock);
619 
620         if (q->perturb_period)
621                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
622 }
623 
624 static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
625 {
626         struct sfq_sched_data *q = qdisc_priv(sch);
627         struct tc_sfq_qopt *ctl = nla_data(opt);
628         struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
629         unsigned int qlen, dropped = 0;
630         struct red_parms *p = NULL;
631 
632         if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
633                 return -EINVAL;
634         if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
635                 ctl_v1 = nla_data(opt);
636         if (ctl->divisor &&
637             (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
638                 return -EINVAL;
639         if (ctl_v1 && ctl_v1->qth_min) {
640                 p = kmalloc(sizeof(*p), GFP_KERNEL);
641                 if (!p)
642                         return -ENOMEM;
643         }
644         sch_tree_lock(sch);
645         if (ctl->quantum) {
646                 q->quantum = ctl->quantum;
647                 q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
648         }
649         q->perturb_period = ctl->perturb_period * HZ;
650         if (ctl->flows)
651                 q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
652         if (ctl->divisor) {
653                 q->divisor = ctl->divisor;
654                 q->maxflows = min_t(u32, q->maxflows, q->divisor);
655         }
656         if (ctl_v1) {
657                 if (ctl_v1->depth)
658                         q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
659                 if (p) {
660                         swap(q->red_parms, p);
661                         red_set_parms(q->red_parms,
662                                       ctl_v1->qth_min, ctl_v1->qth_max,
663                                       ctl_v1->Wlog,
664                                       ctl_v1->Plog, ctl_v1->Scell_log,
665                                       NULL,
666                                       ctl_v1->max_P);
667                 }
668                 q->flags = ctl_v1->flags;
669                 q->headdrop = ctl_v1->headdrop;
670         }
671         if (ctl->limit) {
672                 q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
673                 q->maxflows = min_t(u32, q->maxflows, q->limit);
674         }
675 
676         qlen = sch->q.qlen;
677         while (sch->q.qlen > q->limit)
678                 dropped += sfq_drop(sch);
679         qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
680 
681         del_timer(&q->perturb_timer);
682         if (q->perturb_period) {
683                 mod_timer(&q->perturb_timer, jiffies + q->perturb_period);
684                 q->perturbation = prandom_u32();
685         }
686         sch_tree_unlock(sch);
687         kfree(p);
688         return 0;
689 }
690 
691 static void *sfq_alloc(size_t sz)
692 {
693         return  kvmalloc(sz, GFP_KERNEL);
694 }
695 
696 static void sfq_free(void *addr)
697 {
698         kvfree(addr);
699 }
700 
701 static void sfq_destroy(struct Qdisc *sch)
702 {
703         struct sfq_sched_data *q = qdisc_priv(sch);
704 
705         tcf_block_put(q->block);
706         q->perturb_period = 0;
707         del_timer_sync(&q->perturb_timer);
708         sfq_free(q->ht);
709         sfq_free(q->slots);
710         kfree(q->red_parms);
711 }
712 
713 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
714 {
715         struct sfq_sched_data *q = qdisc_priv(sch);
716         int i;
717         int err;
718 
719         setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
720                                (unsigned long)sch);
721 
722         err = tcf_block_get(&q->block, &q->filter_list);
723         if (err)
724                 return err;
725 
726         for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
727                 q->dep[i].next = i + SFQ_MAX_FLOWS;
728                 q->dep[i].prev = i + SFQ_MAX_FLOWS;
729         }
730 
731         q->limit = SFQ_MAX_DEPTH;
732         q->maxdepth = SFQ_MAX_DEPTH;
733         q->cur_depth = 0;
734         q->tail = NULL;
735         q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
736         q->maxflows = SFQ_DEFAULT_FLOWS;
737         q->quantum = psched_mtu(qdisc_dev(sch));
738         q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
739         q->perturb_period = 0;
740         q->perturbation = prandom_u32();
741 
742         if (opt) {
743                 int err = sfq_change(sch, opt);
744                 if (err)
745                         return err;
746         }
747 
748         q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
749         q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
750         if (!q->ht || !q->slots) {
751                 /* Note: sfq_destroy() will be called by our caller */
752                 return -ENOMEM;
753         }
754 
755         for (i = 0; i < q->divisor; i++)
756                 q->ht[i] = SFQ_EMPTY_SLOT;
757 
758         for (i = 0; i < q->maxflows; i++) {
759                 slot_queue_init(&q->slots[i]);
760                 sfq_link(q, i);
761         }
762         if (q->limit >= 1)
763                 sch->flags |= TCQ_F_CAN_BYPASS;
764         else
765                 sch->flags &= ~TCQ_F_CAN_BYPASS;
766         return 0;
767 }
768 
769 static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
770 {
771         struct sfq_sched_data *q = qdisc_priv(sch);
772         unsigned char *b = skb_tail_pointer(skb);
773         struct tc_sfq_qopt_v1 opt;
774         struct red_parms *p = q->red_parms;
775 
776         memset(&opt, 0, sizeof(opt));
777         opt.v0.quantum  = q->quantum;
778         opt.v0.perturb_period = q->perturb_period / HZ;
779         opt.v0.limit    = q->limit;
780         opt.v0.divisor  = q->divisor;
781         opt.v0.flows    = q->maxflows;
782         opt.depth       = q->maxdepth;
783         opt.headdrop    = q->headdrop;
784 
785         if (p) {
786                 opt.qth_min     = p->qth_min >> p->Wlog;
787                 opt.qth_max     = p->qth_max >> p->Wlog;
788                 opt.Wlog        = p->Wlog;
789                 opt.Plog        = p->Plog;
790                 opt.Scell_log   = p->Scell_log;
791                 opt.max_P       = p->max_P;
792         }
793         memcpy(&opt.stats, &q->stats, sizeof(opt.stats));
794         opt.flags       = q->flags;
795 
796         if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
797                 goto nla_put_failure;
798 
799         return skb->len;
800 
801 nla_put_failure:
802         nlmsg_trim(skb, b);
803         return -1;
804 }
805 
806 static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg)
807 {
808         return NULL;
809 }
810 
811 static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
812 {
813         return 0;
814 }
815 
816 static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent,
817                               u32 classid)
818 {
819         /* we cannot bypass queue discipline anymore */
820         sch->flags &= ~TCQ_F_CAN_BYPASS;
821         return 0;
822 }
823 
824 static void sfq_put(struct Qdisc *q, unsigned long cl)
825 {
826 }
827 
828 static struct tcf_block *sfq_tcf_block(struct Qdisc *sch, unsigned long cl)
829 {
830         struct sfq_sched_data *q = qdisc_priv(sch);
831 
832         if (cl)
833                 return NULL;
834         return q->block;
835 }
836 
837 static int sfq_dump_class(struct Qdisc *sch, unsigned long cl,
838                           struct sk_buff *skb, struct tcmsg *tcm)
839 {
840         tcm->tcm_handle |= TC_H_MIN(cl);
841         return 0;
842 }
843 
844 static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
845                                 struct gnet_dump *d)
846 {
847         struct sfq_sched_data *q = qdisc_priv(sch);
848         sfq_index idx = q->ht[cl - 1];
849         struct gnet_stats_queue qs = { 0 };
850         struct tc_sfq_xstats xstats = { 0 };
851 
852         if (idx != SFQ_EMPTY_SLOT) {
853                 const struct sfq_slot *slot = &q->slots[idx];
854 
855                 xstats.allot = slot->allot << SFQ_ALLOT_SHIFT;
856                 qs.qlen = slot->qlen;
857                 qs.backlog = slot->backlog;
858         }
859         if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
860                 return -1;
861         return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
862 }
863 
864 static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
865 {
866         struct sfq_sched_data *q = qdisc_priv(sch);
867         unsigned int i;
868 
869         if (arg->stop)
870                 return;
871 
872         for (i = 0; i < q->divisor; i++) {
873                 if (q->ht[i] == SFQ_EMPTY_SLOT ||
874                     arg->count < arg->skip) {
875                         arg->count++;
876                         continue;
877                 }
878                 if (arg->fn(sch, i + 1, arg) < 0) {
879                         arg->stop = 1;
880                         break;
881                 }
882                 arg->count++;
883         }
884 }
885 
886 static const struct Qdisc_class_ops sfq_class_ops = {
887         .leaf           =       sfq_leaf,
888         .get            =       sfq_get,
889         .put            =       sfq_put,
890         .tcf_block      =       sfq_tcf_block,
891         .bind_tcf       =       sfq_bind,
892         .unbind_tcf     =       sfq_put,
893         .dump           =       sfq_dump_class,
894         .dump_stats     =       sfq_dump_class_stats,
895         .walk           =       sfq_walk,
896 };
897 
898 static struct Qdisc_ops sfq_qdisc_ops __read_mostly = {
899         .cl_ops         =       &sfq_class_ops,
900         .id             =       "sfq",
901         .priv_size      =       sizeof(struct sfq_sched_data),
902         .enqueue        =       sfq_enqueue,
903         .dequeue        =       sfq_dequeue,
904         .peek           =       qdisc_peek_dequeued,
905         .init           =       sfq_init,
906         .reset          =       sfq_reset,
907         .destroy        =       sfq_destroy,
908         .change         =       NULL,
909         .dump           =       sfq_dump,
910         .owner          =       THIS_MODULE,
911 };
912 
913 static int __init sfq_module_init(void)
914 {
915         return register_qdisc(&sfq_qdisc_ops);
916 }
917 static void __exit sfq_module_exit(void)
918 {
919         unregister_qdisc(&sfq_qdisc_ops);
920 }
921 module_init(sfq_module_init)
922 module_exit(sfq_module_exit)
923 MODULE_LICENSE("GPL");
924 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp