~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/net/sched/sch_cake.c

Version: ~ [ linux-5.5-rc1 ] ~ [ linux-5.4.2 ] ~ [ linux-5.3.15 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.88 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.158 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.206 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.206 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.78 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
  2 
  3 /* COMMON Applications Kept Enhanced (CAKE) discipline
  4  *
  5  * Copyright (C) 2014-2018 Jonathan Morton <chromatix99@gmail.com>
  6  * Copyright (C) 2015-2018 Toke Høiland-Jørgensen <toke@toke.dk>
  7  * Copyright (C) 2014-2018 Dave Täht <dave.taht@gmail.com>
  8  * Copyright (C) 2015-2018 Sebastian Moeller <moeller0@gmx.de>
  9  * (C) 2015-2018 Kevin Darbyshire-Bryant <kevin@darbyshire-bryant.me.uk>
 10  * Copyright (C) 2017-2018 Ryan Mounce <ryan@mounce.com.au>
 11  *
 12  * The CAKE Principles:
 13  *                 (or, how to have your cake and eat it too)
 14  *
 15  * This is a combination of several shaping, AQM and FQ techniques into one
 16  * easy-to-use package:
 17  *
 18  * - An overall bandwidth shaper, to move the bottleneck away from dumb CPE
 19  *   equipment and bloated MACs.  This operates in deficit mode (as in sch_fq),
 20  *   eliminating the need for any sort of burst parameter (eg. token bucket
 21  *   depth).  Burst support is limited to that necessary to overcome scheduling
 22  *   latency.
 23  *
 24  * - A Diffserv-aware priority queue, giving more priority to certain classes,
 25  *   up to a specified fraction of bandwidth.  Above that bandwidth threshold,
 26  *   the priority is reduced to avoid starving other tins.
 27  *
 28  * - Each priority tin has a separate Flow Queue system, to isolate traffic
 29  *   flows from each other.  This prevents a burst on one flow from increasing
 30  *   the delay to another.  Flows are distributed to queues using a
 31  *   set-associative hash function.
 32  *
 33  * - Each queue is actively managed by Cobalt, which is a combination of the
 34  *   Codel and Blue AQM algorithms.  This serves flows fairly, and signals
 35  *   congestion early via ECN (if available) and/or packet drops, to keep
 36  *   latency low.  The codel parameters are auto-tuned based on the bandwidth
 37  *   setting, as is necessary at low bandwidths.
 38  *
 39  * The configuration parameters are kept deliberately simple for ease of use.
 40  * Everything has sane defaults.  Complete generality of configuration is *not*
 41  * a goal.
 42  *
 43  * The priority queue operates according to a weighted DRR scheme, combined with
 44  * a bandwidth tracker which reuses the shaper logic to detect which side of the
 45  * bandwidth sharing threshold the tin is operating.  This determines whether a
 46  * priority-based weight (high) or a bandwidth-based weight (low) is used for
 47  * that tin in the current pass.
 48  *
 49  * This qdisc was inspired by Eric Dumazet's fq_codel code, which he kindly
 50  * granted us permission to leverage.
 51  */
 52 
 53 #include <linux/module.h>
 54 #include <linux/types.h>
 55 #include <linux/kernel.h>
 56 #include <linux/jiffies.h>
 57 #include <linux/string.h>
 58 #include <linux/in.h>
 59 #include <linux/errno.h>
 60 #include <linux/init.h>
 61 #include <linux/skbuff.h>
 62 #include <linux/jhash.h>
 63 #include <linux/slab.h>
 64 #include <linux/vmalloc.h>
 65 #include <linux/reciprocal_div.h>
 66 #include <net/netlink.h>
 67 #include <linux/if_vlan.h>
 68 #include <net/pkt_sched.h>
 69 #include <net/pkt_cls.h>
 70 #include <net/tcp.h>
 71 #include <net/flow_dissector.h>
 72 
 73 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 74 #include <net/netfilter/nf_conntrack_core.h>
 75 #endif
 76 
 77 #define CAKE_SET_WAYS (8)
 78 #define CAKE_MAX_TINS (8)
 79 #define CAKE_QUEUES (1024)
 80 #define CAKE_FLOW_MASK 63
 81 #define CAKE_FLOW_NAT_FLAG 64
 82 
 83 /* struct cobalt_params - contains codel and blue parameters
 84  * @interval:   codel initial drop rate
 85  * @target:     maximum persistent sojourn time & blue update rate
 86  * @mtu_time:   serialisation delay of maximum-size packet
 87  * @p_inc:      increment of blue drop probability (0.32 fxp)
 88  * @p_dec:      decrement of blue drop probability (0.32 fxp)
 89  */
 90 struct cobalt_params {
 91         u64     interval;
 92         u64     target;
 93         u64     mtu_time;
 94         u32     p_inc;
 95         u32     p_dec;
 96 };
 97 
 98 /* struct cobalt_vars - contains codel and blue variables
 99  * @count:              codel dropping frequency
100  * @rec_inv_sqrt:       reciprocal value of sqrt(count) >> 1
101  * @drop_next:          time to drop next packet, or when we dropped last
102  * @blue_timer:         Blue time to next drop
103  * @p_drop:             BLUE drop probability (0.32 fxp)
104  * @dropping:           set if in dropping state
105  * @ecn_marked:         set if marked
106  */
107 struct cobalt_vars {
108         u32     count;
109         u32     rec_inv_sqrt;
110         ktime_t drop_next;
111         ktime_t blue_timer;
112         u32     p_drop;
113         bool    dropping;
114         bool    ecn_marked;
115 };
116 
117 enum {
118         CAKE_SET_NONE = 0,
119         CAKE_SET_SPARSE,
120         CAKE_SET_SPARSE_WAIT, /* counted in SPARSE, actually in BULK */
121         CAKE_SET_BULK,
122         CAKE_SET_DECAYING
123 };
124 
125 struct cake_flow {
126         /* this stuff is all needed per-flow at dequeue time */
127         struct sk_buff    *head;
128         struct sk_buff    *tail;
129         struct list_head  flowchain;
130         s32               deficit;
131         u32               dropped;
132         struct cobalt_vars cvars;
133         u16               srchost; /* index into cake_host table */
134         u16               dsthost;
135         u8                set;
136 }; /* please try to keep this structure <= 64 bytes */
137 
138 struct cake_host {
139         u32 srchost_tag;
140         u32 dsthost_tag;
141         u16 srchost_refcnt;
142         u16 dsthost_refcnt;
143 };
144 
145 struct cake_heap_entry {
146         u16 t:3, b:10;
147 };
148 
149 struct cake_tin_data {
150         struct cake_flow flows[CAKE_QUEUES];
151         u32     backlogs[CAKE_QUEUES];
152         u32     tags[CAKE_QUEUES]; /* for set association */
153         u16     overflow_idx[CAKE_QUEUES];
154         struct cake_host hosts[CAKE_QUEUES]; /* for triple isolation */
155         u16     flow_quantum;
156 
157         struct cobalt_params cparams;
158         u32     drop_overlimit;
159         u16     bulk_flow_count;
160         u16     sparse_flow_count;
161         u16     decaying_flow_count;
162         u16     unresponsive_flow_count;
163 
164         u32     max_skblen;
165 
166         struct list_head new_flows;
167         struct list_head old_flows;
168         struct list_head decaying_flows;
169 
170         /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
171         ktime_t time_next_packet;
172         u64     tin_rate_ns;
173         u64     tin_rate_bps;
174         u16     tin_rate_shft;
175 
176         u16     tin_quantum_prio;
177         u16     tin_quantum_band;
178         s32     tin_deficit;
179         u32     tin_backlog;
180         u32     tin_dropped;
181         u32     tin_ecn_mark;
182 
183         u32     packets;
184         u64     bytes;
185 
186         u32     ack_drops;
187 
188         /* moving averages */
189         u64 avge_delay;
190         u64 peak_delay;
191         u64 base_delay;
192 
193         /* hash function stats */
194         u32     way_directs;
195         u32     way_hits;
196         u32     way_misses;
197         u32     way_collisions;
198 }; /* number of tins is small, so size of this struct doesn't matter much */
199 
200 struct cake_sched_data {
201         struct tcf_proto __rcu *filter_list; /* optional external classifier */
202         struct tcf_block *block;
203         struct cake_tin_data *tins;
204 
205         struct cake_heap_entry overflow_heap[CAKE_QUEUES * CAKE_MAX_TINS];
206         u16             overflow_timeout;
207 
208         u16             tin_cnt;
209         u8              tin_mode;
210         u8              flow_mode;
211         u8              ack_filter;
212         u8              atm_mode;
213 
214         /* time_next = time_this + ((len * rate_ns) >> rate_shft) */
215         u16             rate_shft;
216         ktime_t         time_next_packet;
217         ktime_t         failsafe_next_packet;
218         u64             rate_ns;
219         u64             rate_bps;
220         u16             rate_flags;
221         s16             rate_overhead;
222         u16             rate_mpu;
223         u64             interval;
224         u64             target;
225 
226         /* resource tracking */
227         u32             buffer_used;
228         u32             buffer_max_used;
229         u32             buffer_limit;
230         u32             buffer_config_limit;
231 
232         /* indices for dequeue */
233         u16             cur_tin;
234         u16             cur_flow;
235 
236         struct qdisc_watchdog watchdog;
237         const u8        *tin_index;
238         const u8        *tin_order;
239 
240         /* bandwidth capacity estimate */
241         ktime_t         last_packet_time;
242         ktime_t         avg_window_begin;
243         u64             avg_packet_interval;
244         u64             avg_window_bytes;
245         u64             avg_peak_bandwidth;
246         ktime_t         last_reconfig_time;
247 
248         /* packet length stats */
249         u32             avg_netoff;
250         u16             max_netlen;
251         u16             max_adjlen;
252         u16             min_netlen;
253         u16             min_adjlen;
254 };
255 
256 enum {
257         CAKE_FLAG_OVERHEAD         = BIT(0),
258         CAKE_FLAG_AUTORATE_INGRESS = BIT(1),
259         CAKE_FLAG_INGRESS          = BIT(2),
260         CAKE_FLAG_WASH             = BIT(3),
261         CAKE_FLAG_SPLIT_GSO        = BIT(4)
262 };
263 
264 /* COBALT operates the Codel and BLUE algorithms in parallel, in order to
265  * obtain the best features of each.  Codel is excellent on flows which
266  * respond to congestion signals in a TCP-like way.  BLUE is more effective on
267  * unresponsive flows.
268  */
269 
270 struct cobalt_skb_cb {
271         ktime_t enqueue_time;
272         u32     adjusted_len;
273 };
274 
275 static u64 us_to_ns(u64 us)
276 {
277         return us * NSEC_PER_USEC;
278 }
279 
280 static struct cobalt_skb_cb *get_cobalt_cb(const struct sk_buff *skb)
281 {
282         qdisc_cb_private_validate(skb, sizeof(struct cobalt_skb_cb));
283         return (struct cobalt_skb_cb *)qdisc_skb_cb(skb)->data;
284 }
285 
286 static ktime_t cobalt_get_enqueue_time(const struct sk_buff *skb)
287 {
288         return get_cobalt_cb(skb)->enqueue_time;
289 }
290 
291 static void cobalt_set_enqueue_time(struct sk_buff *skb,
292                                     ktime_t now)
293 {
294         get_cobalt_cb(skb)->enqueue_time = now;
295 }
296 
297 static u16 quantum_div[CAKE_QUEUES + 1] = {0};
298 
299 /* Diffserv lookup tables */
300 
301 static const u8 precedence[] = {
302         0, 0, 0, 0, 0, 0, 0, 0,
303         1, 1, 1, 1, 1, 1, 1, 1,
304         2, 2, 2, 2, 2, 2, 2, 2,
305         3, 3, 3, 3, 3, 3, 3, 3,
306         4, 4, 4, 4, 4, 4, 4, 4,
307         5, 5, 5, 5, 5, 5, 5, 5,
308         6, 6, 6, 6, 6, 6, 6, 6,
309         7, 7, 7, 7, 7, 7, 7, 7,
310 };
311 
312 static const u8 diffserv8[] = {
313         2, 5, 1, 2, 4, 2, 2, 2,
314         0, 2, 1, 2, 1, 2, 1, 2,
315         5, 2, 4, 2, 4, 2, 4, 2,
316         3, 2, 3, 2, 3, 2, 3, 2,
317         6, 2, 3, 2, 3, 2, 3, 2,
318         6, 2, 2, 2, 6, 2, 6, 2,
319         7, 2, 2, 2, 2, 2, 2, 2,
320         7, 2, 2, 2, 2, 2, 2, 2,
321 };
322 
323 static const u8 diffserv4[] = {
324         0, 2, 0, 0, 2, 0, 0, 0,
325         1, 0, 0, 0, 0, 0, 0, 0,
326         2, 0, 2, 0, 2, 0, 2, 0,
327         2, 0, 2, 0, 2, 0, 2, 0,
328         3, 0, 2, 0, 2, 0, 2, 0,
329         3, 0, 0, 0, 3, 0, 3, 0,
330         3, 0, 0, 0, 0, 0, 0, 0,
331         3, 0, 0, 0, 0, 0, 0, 0,
332 };
333 
334 static const u8 diffserv3[] = {
335         0, 0, 0, 0, 2, 0, 0, 0,
336         1, 0, 0, 0, 0, 0, 0, 0,
337         0, 0, 0, 0, 0, 0, 0, 0,
338         0, 0, 0, 0, 0, 0, 0, 0,
339         0, 0, 0, 0, 0, 0, 0, 0,
340         0, 0, 0, 0, 2, 0, 2, 0,
341         2, 0, 0, 0, 0, 0, 0, 0,
342         2, 0, 0, 0, 0, 0, 0, 0,
343 };
344 
345 static const u8 besteffort[] = {
346         0, 0, 0, 0, 0, 0, 0, 0,
347         0, 0, 0, 0, 0, 0, 0, 0,
348         0, 0, 0, 0, 0, 0, 0, 0,
349         0, 0, 0, 0, 0, 0, 0, 0,
350         0, 0, 0, 0, 0, 0, 0, 0,
351         0, 0, 0, 0, 0, 0, 0, 0,
352         0, 0, 0, 0, 0, 0, 0, 0,
353         0, 0, 0, 0, 0, 0, 0, 0,
354 };
355 
356 /* tin priority order for stats dumping */
357 
358 static const u8 normal_order[] = {0, 1, 2, 3, 4, 5, 6, 7};
359 static const u8 bulk_order[] = {1, 0, 2, 3};
360 
361 #define REC_INV_SQRT_CACHE (16)
362 static u32 cobalt_rec_inv_sqrt_cache[REC_INV_SQRT_CACHE] = {0};
363 
364 /* http://en.wikipedia.org/wiki/Methods_of_computing_square_roots
365  * new_invsqrt = (invsqrt / 2) * (3 - count * invsqrt^2)
366  *
367  * Here, invsqrt is a fixed point number (< 1.0), 32bit mantissa, aka Q0.32
368  */
369 
370 static void cobalt_newton_step(struct cobalt_vars *vars)
371 {
372         u32 invsqrt, invsqrt2;
373         u64 val;
374 
375         invsqrt = vars->rec_inv_sqrt;
376         invsqrt2 = ((u64)invsqrt * invsqrt) >> 32;
377         val = (3LL << 32) - ((u64)vars->count * invsqrt2);
378 
379         val >>= 2; /* avoid overflow in following multiply */
380         val = (val * invsqrt) >> (32 - 2 + 1);
381 
382         vars->rec_inv_sqrt = val;
383 }
384 
385 static void cobalt_invsqrt(struct cobalt_vars *vars)
386 {
387         if (vars->count < REC_INV_SQRT_CACHE)
388                 vars->rec_inv_sqrt = cobalt_rec_inv_sqrt_cache[vars->count];
389         else
390                 cobalt_newton_step(vars);
391 }
392 
393 /* There is a big difference in timing between the accurate values placed in
394  * the cache and the approximations given by a single Newton step for small
395  * count values, particularly when stepping from count 1 to 2 or vice versa.
396  * Above 16, a single Newton step gives sufficient accuracy in either
397  * direction, given the precision stored.
398  *
399  * The magnitude of the error when stepping up to count 2 is such as to give
400  * the value that *should* have been produced at count 4.
401  */
402 
403 static void cobalt_cache_init(void)
404 {
405         struct cobalt_vars v;
406 
407         memset(&v, 0, sizeof(v));
408         v.rec_inv_sqrt = ~0U;
409         cobalt_rec_inv_sqrt_cache[0] = v.rec_inv_sqrt;
410 
411         for (v.count = 1; v.count < REC_INV_SQRT_CACHE; v.count++) {
412                 cobalt_newton_step(&v);
413                 cobalt_newton_step(&v);
414                 cobalt_newton_step(&v);
415                 cobalt_newton_step(&v);
416 
417                 cobalt_rec_inv_sqrt_cache[v.count] = v.rec_inv_sqrt;
418         }
419 }
420 
421 static void cobalt_vars_init(struct cobalt_vars *vars)
422 {
423         memset(vars, 0, sizeof(*vars));
424 
425         if (!cobalt_rec_inv_sqrt_cache[0]) {
426                 cobalt_cache_init();
427                 cobalt_rec_inv_sqrt_cache[0] = ~0;
428         }
429 }
430 
431 /* CoDel control_law is t + interval/sqrt(count)
432  * We maintain in rec_inv_sqrt the reciprocal value of sqrt(count) to avoid
433  * both sqrt() and divide operation.
434  */
435 static ktime_t cobalt_control(ktime_t t,
436                               u64 interval,
437                               u32 rec_inv_sqrt)
438 {
439         return ktime_add_ns(t, reciprocal_scale(interval,
440                                                 rec_inv_sqrt));
441 }
442 
443 /* Call this when a packet had to be dropped due to queue overflow.  Returns
444  * true if the BLUE state was quiescent before but active after this call.
445  */
446 static bool cobalt_queue_full(struct cobalt_vars *vars,
447                               struct cobalt_params *p,
448                               ktime_t now)
449 {
450         bool up = false;
451 
452         if (ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
453                 up = !vars->p_drop;
454                 vars->p_drop += p->p_inc;
455                 if (vars->p_drop < p->p_inc)
456                         vars->p_drop = ~0;
457                 vars->blue_timer = now;
458         }
459         vars->dropping = true;
460         vars->drop_next = now;
461         if (!vars->count)
462                 vars->count = 1;
463 
464         return up;
465 }
466 
467 /* Call this when the queue was serviced but turned out to be empty.  Returns
468  * true if the BLUE state was active before but quiescent after this call.
469  */
470 static bool cobalt_queue_empty(struct cobalt_vars *vars,
471                                struct cobalt_params *p,
472                                ktime_t now)
473 {
474         bool down = false;
475 
476         if (vars->p_drop &&
477             ktime_to_ns(ktime_sub(now, vars->blue_timer)) > p->target) {
478                 if (vars->p_drop < p->p_dec)
479                         vars->p_drop = 0;
480                 else
481                         vars->p_drop -= p->p_dec;
482                 vars->blue_timer = now;
483                 down = !vars->p_drop;
484         }
485         vars->dropping = false;
486 
487         if (vars->count && ktime_to_ns(ktime_sub(now, vars->drop_next)) >= 0) {
488                 vars->count--;
489                 cobalt_invsqrt(vars);
490                 vars->drop_next = cobalt_control(vars->drop_next,
491                                                  p->interval,
492                                                  vars->rec_inv_sqrt);
493         }
494 
495         return down;
496 }
497 
498 /* Call this with a freshly dequeued packet for possible congestion marking.
499  * Returns true as an instruction to drop the packet, false for delivery.
500  */
501 static bool cobalt_should_drop(struct cobalt_vars *vars,
502                                struct cobalt_params *p,
503                                ktime_t now,
504                                struct sk_buff *skb,
505                                u32 bulk_flows)
506 {
507         bool next_due, over_target, drop = false;
508         ktime_t schedule;
509         u64 sojourn;
510 
511 /* The 'schedule' variable records, in its sign, whether 'now' is before or
512  * after 'drop_next'.  This allows 'drop_next' to be updated before the next
513  * scheduling decision is actually branched, without destroying that
514  * information.  Similarly, the first 'schedule' value calculated is preserved
515  * in the boolean 'next_due'.
516  *
517  * As for 'drop_next', we take advantage of the fact that 'interval' is both
518  * the delay between first exceeding 'target' and the first signalling event,
519  * *and* the scaling factor for the signalling frequency.  It's therefore very
520  * natural to use a single mechanism for both purposes, and eliminates a
521  * significant amount of reference Codel's spaghetti code.  To help with this,
522  * both the '' and '1' entries in the invsqrt cache are 0xFFFFFFFF, as close
523  * as possible to 1.0 in fixed-point.
524  */
525 
526         sojourn = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
527         schedule = ktime_sub(now, vars->drop_next);
528         over_target = sojourn > p->target &&
529                       sojourn > p->mtu_time * bulk_flows * 2 &&
530                       sojourn > p->mtu_time * 4;
531         next_due = vars->count && ktime_to_ns(schedule) >= 0;
532 
533         vars->ecn_marked = false;
534 
535         if (over_target) {
536                 if (!vars->dropping) {
537                         vars->dropping = true;
538                         vars->drop_next = cobalt_control(now,
539                                                          p->interval,
540                                                          vars->rec_inv_sqrt);
541                 }
542                 if (!vars->count)
543                         vars->count = 1;
544         } else if (vars->dropping) {
545                 vars->dropping = false;
546         }
547 
548         if (next_due && vars->dropping) {
549                 /* Use ECN mark if possible, otherwise drop */
550                 drop = !(vars->ecn_marked = INET_ECN_set_ce(skb));
551 
552                 vars->count++;
553                 if (!vars->count)
554                         vars->count--;
555                 cobalt_invsqrt(vars);
556                 vars->drop_next = cobalt_control(vars->drop_next,
557                                                  p->interval,
558                                                  vars->rec_inv_sqrt);
559                 schedule = ktime_sub(now, vars->drop_next);
560         } else {
561                 while (next_due) {
562                         vars->count--;
563                         cobalt_invsqrt(vars);
564                         vars->drop_next = cobalt_control(vars->drop_next,
565                                                          p->interval,
566                                                          vars->rec_inv_sqrt);
567                         schedule = ktime_sub(now, vars->drop_next);
568                         next_due = vars->count && ktime_to_ns(schedule) >= 0;
569                 }
570         }
571 
572         /* Simple BLUE implementation.  Lack of ECN is deliberate. */
573         if (vars->p_drop)
574                 drop |= (prandom_u32() < vars->p_drop);
575 
576         /* Overload the drop_next field as an activity timeout */
577         if (!vars->count)
578                 vars->drop_next = ktime_add_ns(now, p->interval);
579         else if (ktime_to_ns(schedule) > 0 && !drop)
580                 vars->drop_next = now;
581 
582         return drop;
583 }
584 
585 static void cake_update_flowkeys(struct flow_keys *keys,
586                                  const struct sk_buff *skb)
587 {
588 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
589         struct nf_conntrack_tuple tuple = {};
590         bool rev = !skb->_nfct;
591 
592         if (tc_skb_protocol(skb) != htons(ETH_P_IP))
593                 return;
594 
595         if (!nf_ct_get_tuple_skb(&tuple, skb))
596                 return;
597 
598         keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
599         keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
600 
601         if (keys->ports.ports) {
602                 keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
603                 keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
604         }
605 #endif
606 }
607 
608 /* Cake has several subtle multiple bit settings. In these cases you
609  *  would be matching triple isolate mode as well.
610  */
611 
612 static bool cake_dsrc(int flow_mode)
613 {
614         return (flow_mode & CAKE_FLOW_DUAL_SRC) == CAKE_FLOW_DUAL_SRC;
615 }
616 
617 static bool cake_ddst(int flow_mode)
618 {
619         return (flow_mode & CAKE_FLOW_DUAL_DST) == CAKE_FLOW_DUAL_DST;
620 }
621 
622 static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
623                      int flow_mode, u16 flow_override, u16 host_override)
624 {
625         u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
626         u16 reduced_hash, srchost_idx, dsthost_idx;
627         struct flow_keys keys, host_keys;
628 
629         if (unlikely(flow_mode == CAKE_FLOW_NONE))
630                 return 0;
631 
632         /* If both overrides are set we can skip packet dissection entirely */
633         if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
634             (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
635                 goto skip_hash;
636 
637         skb_flow_dissect_flow_keys(skb, &keys,
638                                    FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
639 
640         if (flow_mode & CAKE_FLOW_NAT_FLAG)
641                 cake_update_flowkeys(&keys, skb);
642 
643         /* flow_hash_from_keys() sorts the addresses by value, so we have
644          * to preserve their order in a separate data structure to treat
645          * src and dst host addresses as independently selectable.
646          */
647         host_keys = keys;
648         host_keys.ports.ports     = 0;
649         host_keys.basic.ip_proto  = 0;
650         host_keys.keyid.keyid     = 0;
651         host_keys.tags.flow_label = 0;
652 
653         switch (host_keys.control.addr_type) {
654         case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
655                 host_keys.addrs.v4addrs.src = 0;
656                 dsthost_hash = flow_hash_from_keys(&host_keys);
657                 host_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src;
658                 host_keys.addrs.v4addrs.dst = 0;
659                 srchost_hash = flow_hash_from_keys(&host_keys);
660                 break;
661 
662         case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
663                 memset(&host_keys.addrs.v6addrs.src, 0,
664                        sizeof(host_keys.addrs.v6addrs.src));
665                 dsthost_hash = flow_hash_from_keys(&host_keys);
666                 host_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src;
667                 memset(&host_keys.addrs.v6addrs.dst, 0,
668                        sizeof(host_keys.addrs.v6addrs.dst));
669                 srchost_hash = flow_hash_from_keys(&host_keys);
670                 break;
671 
672         default:
673                 dsthost_hash = 0;
674                 srchost_hash = 0;
675         }
676 
677         /* This *must* be after the above switch, since as a
678          * side-effect it sorts the src and dst addresses.
679          */
680         if (flow_mode & CAKE_FLOW_FLOWS)
681                 flow_hash = flow_hash_from_keys(&keys);
682 
683 skip_hash:
684         if (flow_override)
685                 flow_hash = flow_override - 1;
686         if (host_override) {
687                 dsthost_hash = host_override - 1;
688                 srchost_hash = host_override - 1;
689         }
690 
691         if (!(flow_mode & CAKE_FLOW_FLOWS)) {
692                 if (flow_mode & CAKE_FLOW_SRC_IP)
693                         flow_hash ^= srchost_hash;
694 
695                 if (flow_mode & CAKE_FLOW_DST_IP)
696                         flow_hash ^= dsthost_hash;
697         }
698 
699         reduced_hash = flow_hash % CAKE_QUEUES;
700 
701         /* set-associative hashing */
702         /* fast path if no hash collision (direct lookup succeeds) */
703         if (likely(q->tags[reduced_hash] == flow_hash &&
704                    q->flows[reduced_hash].set)) {
705                 q->way_directs++;
706         } else {
707                 u32 inner_hash = reduced_hash % CAKE_SET_WAYS;
708                 u32 outer_hash = reduced_hash - inner_hash;
709                 bool allocate_src = false;
710                 bool allocate_dst = false;
711                 u32 i, k;
712 
713                 /* check if any active queue in the set is reserved for
714                  * this flow.
715                  */
716                 for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
717                      i++, k = (k + 1) % CAKE_SET_WAYS) {
718                         if (q->tags[outer_hash + k] == flow_hash) {
719                                 if (i)
720                                         q->way_hits++;
721 
722                                 if (!q->flows[outer_hash + k].set) {
723                                         /* need to increment host refcnts */
724                                         allocate_src = cake_dsrc(flow_mode);
725                                         allocate_dst = cake_ddst(flow_mode);
726                                 }
727 
728                                 goto found;
729                         }
730                 }
731 
732                 /* no queue is reserved for this flow, look for an
733                  * empty one.
734                  */
735                 for (i = 0; i < CAKE_SET_WAYS;
736                          i++, k = (k + 1) % CAKE_SET_WAYS) {
737                         if (!q->flows[outer_hash + k].set) {
738                                 q->way_misses++;
739                                 allocate_src = cake_dsrc(flow_mode);
740                                 allocate_dst = cake_ddst(flow_mode);
741                                 goto found;
742                         }
743                 }
744 
745                 /* With no empty queues, default to the original
746                  * queue, accept the collision, update the host tags.
747                  */
748                 q->way_collisions++;
749                 q->hosts[q->flows[reduced_hash].srchost].srchost_refcnt--;
750                 q->hosts[q->flows[reduced_hash].dsthost].dsthost_refcnt--;
751                 allocate_src = cake_dsrc(flow_mode);
752                 allocate_dst = cake_ddst(flow_mode);
753 found:
754                 /* reserve queue for future packets in same flow */
755                 reduced_hash = outer_hash + k;
756                 q->tags[reduced_hash] = flow_hash;
757 
758                 if (allocate_src) {
759                         srchost_idx = srchost_hash % CAKE_QUEUES;
760                         inner_hash = srchost_idx % CAKE_SET_WAYS;
761                         outer_hash = srchost_idx - inner_hash;
762                         for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
763                                 i++, k = (k + 1) % CAKE_SET_WAYS) {
764                                 if (q->hosts[outer_hash + k].srchost_tag ==
765                                     srchost_hash)
766                                         goto found_src;
767                         }
768                         for (i = 0; i < CAKE_SET_WAYS;
769                                 i++, k = (k + 1) % CAKE_SET_WAYS) {
770                                 if (!q->hosts[outer_hash + k].srchost_refcnt)
771                                         break;
772                         }
773                         q->hosts[outer_hash + k].srchost_tag = srchost_hash;
774 found_src:
775                         srchost_idx = outer_hash + k;
776                         q->hosts[srchost_idx].srchost_refcnt++;
777                         q->flows[reduced_hash].srchost = srchost_idx;
778                 }
779 
780                 if (allocate_dst) {
781                         dsthost_idx = dsthost_hash % CAKE_QUEUES;
782                         inner_hash = dsthost_idx % CAKE_SET_WAYS;
783                         outer_hash = dsthost_idx - inner_hash;
784                         for (i = 0, k = inner_hash; i < CAKE_SET_WAYS;
785                              i++, k = (k + 1) % CAKE_SET_WAYS) {
786                                 if (q->hosts[outer_hash + k].dsthost_tag ==
787                                     dsthost_hash)
788                                         goto found_dst;
789                         }
790                         for (i = 0; i < CAKE_SET_WAYS;
791                              i++, k = (k + 1) % CAKE_SET_WAYS) {
792                                 if (!q->hosts[outer_hash + k].dsthost_refcnt)
793                                         break;
794                         }
795                         q->hosts[outer_hash + k].dsthost_tag = dsthost_hash;
796 found_dst:
797                         dsthost_idx = outer_hash + k;
798                         q->hosts[dsthost_idx].dsthost_refcnt++;
799                         q->flows[reduced_hash].dsthost = dsthost_idx;
800                 }
801         }
802 
803         return reduced_hash;
804 }
805 
806 /* helper functions : might be changed when/if skb use a standard list_head */
807 /* remove one skb from head of slot queue */
808 
809 static struct sk_buff *dequeue_head(struct cake_flow *flow)
810 {
811         struct sk_buff *skb = flow->head;
812 
813         if (skb) {
814                 flow->head = skb->next;
815                 skb_mark_not_on_list(skb);
816         }
817 
818         return skb;
819 }
820 
821 /* add skb to flow queue (tail add) */
822 
823 static void flow_queue_add(struct cake_flow *flow, struct sk_buff *skb)
824 {
825         if (!flow->head)
826                 flow->head = skb;
827         else
828                 flow->tail->next = skb;
829         flow->tail = skb;
830         skb->next = NULL;
831 }
832 
833 static struct iphdr *cake_get_iphdr(const struct sk_buff *skb,
834                                     struct ipv6hdr *buf)
835 {
836         unsigned int offset = skb_network_offset(skb);
837         struct iphdr *iph;
838 
839         iph = skb_header_pointer(skb, offset, sizeof(struct iphdr), buf);
840 
841         if (!iph)
842                 return NULL;
843 
844         if (iph->version == 4 && iph->protocol == IPPROTO_IPV6)
845                 return skb_header_pointer(skb, offset + iph->ihl * 4,
846                                           sizeof(struct ipv6hdr), buf);
847 
848         else if (iph->version == 4)
849                 return iph;
850 
851         else if (iph->version == 6)
852                 return skb_header_pointer(skb, offset, sizeof(struct ipv6hdr),
853                                           buf);
854 
855         return NULL;
856 }
857 
858 static struct tcphdr *cake_get_tcphdr(const struct sk_buff *skb,
859                                       void *buf, unsigned int bufsize)
860 {
861         unsigned int offset = skb_network_offset(skb);
862         const struct ipv6hdr *ipv6h;
863         const struct tcphdr *tcph;
864         const struct iphdr *iph;
865         struct ipv6hdr _ipv6h;
866         struct tcphdr _tcph;
867 
868         ipv6h = skb_header_pointer(skb, offset, sizeof(_ipv6h), &_ipv6h);
869 
870         if (!ipv6h)
871                 return NULL;
872 
873         if (ipv6h->version == 4) {
874                 iph = (struct iphdr *)ipv6h;
875                 offset += iph->ihl * 4;
876 
877                 /* special-case 6in4 tunnelling, as that is a common way to get
878                  * v6 connectivity in the home
879                  */
880                 if (iph->protocol == IPPROTO_IPV6) {
881                         ipv6h = skb_header_pointer(skb, offset,
882                                                    sizeof(_ipv6h), &_ipv6h);
883 
884                         if (!ipv6h || ipv6h->nexthdr != IPPROTO_TCP)
885                                 return NULL;
886 
887                         offset += sizeof(struct ipv6hdr);
888 
889                 } else if (iph->protocol != IPPROTO_TCP) {
890                         return NULL;
891                 }
892 
893         } else if (ipv6h->version == 6) {
894                 if (ipv6h->nexthdr != IPPROTO_TCP)
895                         return NULL;
896 
897                 offset += sizeof(struct ipv6hdr);
898         } else {
899                 return NULL;
900         }
901 
902         tcph = skb_header_pointer(skb, offset, sizeof(_tcph), &_tcph);
903         if (!tcph)
904                 return NULL;
905 
906         return skb_header_pointer(skb, offset,
907                                   min(__tcp_hdrlen(tcph), bufsize), buf);
908 }
909 
910 static const void *cake_get_tcpopt(const struct tcphdr *tcph,
911                                    int code, int *oplen)
912 {
913         /* inspired by tcp_parse_options in tcp_input.c */
914         int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
915         const u8 *ptr = (const u8 *)(tcph + 1);
916 
917         while (length > 0) {
918                 int opcode = *ptr++;
919                 int opsize;
920 
921                 if (opcode == TCPOPT_EOL)
922                         break;
923                 if (opcode == TCPOPT_NOP) {
924                         length--;
925                         continue;
926                 }
927                 opsize = *ptr++;
928                 if (opsize < 2 || opsize > length)
929                         break;
930 
931                 if (opcode == code) {
932                         *oplen = opsize;
933                         return ptr;
934                 }
935 
936                 ptr += opsize - 2;
937                 length -= opsize;
938         }
939 
940         return NULL;
941 }
942 
943 /* Compare two SACK sequences. A sequence is considered greater if it SACKs more
944  * bytes than the other. In the case where both sequences ACKs bytes that the
945  * other doesn't, A is considered greater. DSACKs in A also makes A be
946  * considered greater.
947  *
948  * @return -1, 0 or 1 as normal compare functions
949  */
950 static int cake_tcph_sack_compare(const struct tcphdr *tcph_a,
951                                   const struct tcphdr *tcph_b)
952 {
953         const struct tcp_sack_block_wire *sack_a, *sack_b;
954         u32 ack_seq_a = ntohl(tcph_a->ack_seq);
955         u32 bytes_a = 0, bytes_b = 0;
956         int oplen_a, oplen_b;
957         bool first = true;
958 
959         sack_a = cake_get_tcpopt(tcph_a, TCPOPT_SACK, &oplen_a);
960         sack_b = cake_get_tcpopt(tcph_b, TCPOPT_SACK, &oplen_b);
961 
962         /* pointers point to option contents */
963         oplen_a -= TCPOLEN_SACK_BASE;
964         oplen_b -= TCPOLEN_SACK_BASE;
965 
966         if (sack_a && oplen_a >= sizeof(*sack_a) &&
967             (!sack_b || oplen_b < sizeof(*sack_b)))
968                 return -1;
969         else if (sack_b && oplen_b >= sizeof(*sack_b) &&
970                  (!sack_a || oplen_a < sizeof(*sack_a)))
971                 return 1;
972         else if ((!sack_a || oplen_a < sizeof(*sack_a)) &&
973                  (!sack_b || oplen_b < sizeof(*sack_b)))
974                 return 0;
975 
976         while (oplen_a >= sizeof(*sack_a)) {
977                 const struct tcp_sack_block_wire *sack_tmp = sack_b;
978                 u32 start_a = get_unaligned_be32(&sack_a->start_seq);
979                 u32 end_a = get_unaligned_be32(&sack_a->end_seq);
980                 int oplen_tmp = oplen_b;
981                 bool found = false;
982 
983                 /* DSACK; always considered greater to prevent dropping */
984                 if (before(start_a, ack_seq_a))
985                         return -1;
986 
987                 bytes_a += end_a - start_a;
988 
989                 while (oplen_tmp >= sizeof(*sack_tmp)) {
990                         u32 start_b = get_unaligned_be32(&sack_tmp->start_seq);
991                         u32 end_b = get_unaligned_be32(&sack_tmp->end_seq);
992 
993                         /* first time through we count the total size */
994                         if (first)
995                                 bytes_b += end_b - start_b;
996 
997                         if (!after(start_b, start_a) && !before(end_b, end_a)) {
998                                 found = true;
999                                 if (!first)
1000                                         break;
1001                         }
1002                         oplen_tmp -= sizeof(*sack_tmp);
1003                         sack_tmp++;
1004                 }
1005 
1006                 if (!found)
1007                         return -1;
1008 
1009                 oplen_a -= sizeof(*sack_a);
1010                 sack_a++;
1011                 first = false;
1012         }
1013 
1014         /* If we made it this far, all ranges SACKed by A are covered by B, so
1015          * either the SACKs are equal, or B SACKs more bytes.
1016          */
1017         return bytes_b > bytes_a ? 1 : 0;
1018 }
1019 
1020 static void cake_tcph_get_tstamp(const struct tcphdr *tcph,
1021                                  u32 *tsval, u32 *tsecr)
1022 {
1023         const u8 *ptr;
1024         int opsize;
1025 
1026         ptr = cake_get_tcpopt(tcph, TCPOPT_TIMESTAMP, &opsize);
1027 
1028         if (ptr && opsize == TCPOLEN_TIMESTAMP) {
1029                 *tsval = get_unaligned_be32(ptr);
1030                 *tsecr = get_unaligned_be32(ptr + 4);
1031         }
1032 }
1033 
1034 static bool cake_tcph_may_drop(const struct tcphdr *tcph,
1035                                u32 tstamp_new, u32 tsecr_new)
1036 {
1037         /* inspired by tcp_parse_options in tcp_input.c */
1038         int length = __tcp_hdrlen(tcph) - sizeof(struct tcphdr);
1039         const u8 *ptr = (const u8 *)(tcph + 1);
1040         u32 tstamp, tsecr;
1041 
1042         /* 3 reserved flags must be unset to avoid future breakage
1043          * ACK must be set
1044          * ECE/CWR are handled separately
1045          * All other flags URG/PSH/RST/SYN/FIN must be unset
1046          * 0x0FFF0000 = all TCP flags (confirm ACK=1, others zero)
1047          * 0x00C00000 = CWR/ECE (handled separately)
1048          * 0x0F3F0000 = 0x0FFF0000 & ~0x00C00000
1049          */
1050         if (((tcp_flag_word(tcph) &
1051               cpu_to_be32(0x0F3F0000)) != TCP_FLAG_ACK))
1052                 return false;
1053 
1054         while (length > 0) {
1055                 int opcode = *ptr++;
1056                 int opsize;
1057 
1058                 if (opcode == TCPOPT_EOL)
1059                         break;
1060                 if (opcode == TCPOPT_NOP) {
1061                         length--;
1062                         continue;
1063                 }
1064                 opsize = *ptr++;
1065                 if (opsize < 2 || opsize > length)
1066                         break;
1067 
1068                 switch (opcode) {
1069                 case TCPOPT_MD5SIG: /* doesn't influence state */
1070                         break;
1071 
1072                 case TCPOPT_SACK: /* stricter checking performed later */
1073                         if (opsize % 8 != 2)
1074                                 return false;
1075                         break;
1076 
1077                 case TCPOPT_TIMESTAMP:
1078                         /* only drop timestamps lower than new */
1079                         if (opsize != TCPOLEN_TIMESTAMP)
1080                                 return false;
1081                         tstamp = get_unaligned_be32(ptr);
1082                         tsecr = get_unaligned_be32(ptr + 4);
1083                         if (after(tstamp, tstamp_new) ||
1084                             after(tsecr, tsecr_new))
1085                                 return false;
1086                         break;
1087 
1088                 case TCPOPT_MSS:  /* these should only be set on SYN */
1089                 case TCPOPT_WINDOW:
1090                 case TCPOPT_SACK_PERM:
1091                 case TCPOPT_FASTOPEN:
1092                 case TCPOPT_EXP:
1093                 default: /* don't drop if any unknown options are present */
1094                         return false;
1095                 }
1096 
1097                 ptr += opsize - 2;
1098                 length -= opsize;
1099         }
1100 
1101         return true;
1102 }
1103 
1104 static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
1105                                        struct cake_flow *flow)
1106 {
1107         bool aggressive = q->ack_filter == CAKE_ACK_AGGRESSIVE;
1108         struct sk_buff *elig_ack = NULL, *elig_ack_prev = NULL;
1109         struct sk_buff *skb_check, *skb_prev = NULL;
1110         const struct ipv6hdr *ipv6h, *ipv6h_check;
1111         unsigned char _tcph[64], _tcph_check[64];
1112         const struct tcphdr *tcph, *tcph_check;
1113         const struct iphdr *iph, *iph_check;
1114         struct ipv6hdr _iph, _iph_check;
1115         const struct sk_buff *skb;
1116         int seglen, num_found = 0;
1117         u32 tstamp = 0, tsecr = 0;
1118         __be32 elig_flags = 0;
1119         int sack_comp;
1120 
1121         /* no other possible ACKs to filter */
1122         if (flow->head == flow->tail)
1123                 return NULL;
1124 
1125         skb = flow->tail;
1126         tcph = cake_get_tcphdr(skb, _tcph, sizeof(_tcph));
1127         iph = cake_get_iphdr(skb, &_iph);
1128         if (!tcph)
1129                 return NULL;
1130 
1131         cake_tcph_get_tstamp(tcph, &tstamp, &tsecr);
1132 
1133         /* the 'triggering' packet need only have the ACK flag set.
1134          * also check that SYN is not set, as there won't be any previous ACKs.
1135          */
1136         if ((tcp_flag_word(tcph) &
1137              (TCP_FLAG_ACK | TCP_FLAG_SYN)) != TCP_FLAG_ACK)
1138                 return NULL;
1139 
1140         /* the 'triggering' ACK is at the tail of the queue, we have already
1141          * returned if it is the only packet in the flow. loop through the rest
1142          * of the queue looking for pure ACKs with the same 5-tuple as the
1143          * triggering one.
1144          */
1145         for (skb_check = flow->head;
1146              skb_check && skb_check != skb;
1147              skb_prev = skb_check, skb_check = skb_check->next) {
1148                 iph_check = cake_get_iphdr(skb_check, &_iph_check);
1149                 tcph_check = cake_get_tcphdr(skb_check, &_tcph_check,
1150                                              sizeof(_tcph_check));
1151 
1152                 /* only TCP packets with matching 5-tuple are eligible, and only
1153                  * drop safe headers
1154                  */
1155                 if (!tcph_check || iph->version != iph_check->version ||
1156                     tcph_check->source != tcph->source ||
1157                     tcph_check->dest != tcph->dest)
1158                         continue;
1159 
1160                 if (iph_check->version == 4) {
1161                         if (iph_check->saddr != iph->saddr ||
1162                             iph_check->daddr != iph->daddr)
1163                                 continue;
1164 
1165                         seglen = ntohs(iph_check->tot_len) -
1166                                        (4 * iph_check->ihl);
1167                 } else if (iph_check->version == 6) {
1168                         ipv6h = (struct ipv6hdr *)iph;
1169                         ipv6h_check = (struct ipv6hdr *)iph_check;
1170 
1171                         if (ipv6_addr_cmp(&ipv6h_check->saddr, &ipv6h->saddr) ||
1172                             ipv6_addr_cmp(&ipv6h_check->daddr, &ipv6h->daddr))
1173                                 continue;
1174 
1175                         seglen = ntohs(ipv6h_check->payload_len);
1176                 } else {
1177                         WARN_ON(1);  /* shouldn't happen */
1178                         continue;
1179                 }
1180 
1181                 /* If the ECE/CWR flags changed from the previous eligible
1182                  * packet in the same flow, we should no longer be dropping that
1183                  * previous packet as this would lose information.
1184                  */
1185                 if (elig_ack && (tcp_flag_word(tcph_check) &
1186                                  (TCP_FLAG_ECE | TCP_FLAG_CWR)) != elig_flags) {
1187                         elig_ack = NULL;
1188                         elig_ack_prev = NULL;
1189                         num_found--;
1190                 }
1191 
1192                 /* Check TCP options and flags, don't drop ACKs with segment
1193                  * data, and don't drop ACKs with a higher cumulative ACK
1194                  * counter than the triggering packet. Check ACK seqno here to
1195                  * avoid parsing SACK options of packets we are going to exclude
1196                  * anyway.
1197                  */
1198                 if (!cake_tcph_may_drop(tcph_check, tstamp, tsecr) ||
1199                     (seglen - __tcp_hdrlen(tcph_check)) != 0 ||
1200                     after(ntohl(tcph_check->ack_seq), ntohl(tcph->ack_seq)))
1201                         continue;
1202 
1203                 /* Check SACK options. The triggering packet must SACK more data
1204                  * than the ACK under consideration, or SACK the same range but
1205                  * have a larger cumulative ACK counter. The latter is a
1206                  * pathological case, but is contained in the following check
1207                  * anyway, just to be safe.
1208                  */
1209                 sack_comp = cake_tcph_sack_compare(tcph_check, tcph);
1210 
1211                 if (sack_comp < 0 ||
1212                     (ntohl(tcph_check->ack_seq) == ntohl(tcph->ack_seq) &&
1213                      sack_comp == 0))
1214                         continue;
1215 
1216                 /* At this point we have found an eligible pure ACK to drop; if
1217                  * we are in aggressive mode, we are done. Otherwise, keep
1218                  * searching unless this is the second eligible ACK we
1219                  * found.
1220                  *
1221                  * Since we want to drop ACK closest to the head of the queue,
1222                  * save the first eligible ACK we find, even if we need to loop
1223                  * again.
1224                  */
1225                 if (!elig_ack) {
1226                         elig_ack = skb_check;
1227                         elig_ack_prev = skb_prev;
1228                         elig_flags = (tcp_flag_word(tcph_check)
1229                                       & (TCP_FLAG_ECE | TCP_FLAG_CWR));
1230                 }
1231 
1232                 if (num_found++ > 0)
1233                         goto found;
1234         }
1235 
1236         /* We made it through the queue without finding two eligible ACKs . If
1237          * we found a single eligible ACK we can drop it in aggressive mode if
1238          * we can guarantee that this does not interfere with ECN flag
1239          * information. We ensure this by dropping it only if the enqueued
1240          * packet is consecutive with the eligible ACK, and their flags match.
1241          */
1242         if (elig_ack && aggressive && elig_ack->next == skb &&
1243             (elig_flags == (tcp_flag_word(tcph) &
1244                             (TCP_FLAG_ECE | TCP_FLAG_CWR))))
1245                 goto found;
1246 
1247         return NULL;
1248 
1249 found:
1250         if (elig_ack_prev)
1251                 elig_ack_prev->next = elig_ack->next;
1252         else
1253                 flow->head = elig_ack->next;
1254 
1255         skb_mark_not_on_list(elig_ack);
1256 
1257         return elig_ack;
1258 }
1259 
1260 static u64 cake_ewma(u64 avg, u64 sample, u32 shift)
1261 {
1262         avg -= avg >> shift;
1263         avg += sample >> shift;
1264         return avg;
1265 }
1266 
1267 static u32 cake_calc_overhead(struct cake_sched_data *q, u32 len, u32 off)
1268 {
1269         if (q->rate_flags & CAKE_FLAG_OVERHEAD)
1270                 len -= off;
1271 
1272         if (q->max_netlen < len)
1273                 q->max_netlen = len;
1274         if (q->min_netlen > len)
1275                 q->min_netlen = len;
1276 
1277         len += q->rate_overhead;
1278 
1279         if (len < q->rate_mpu)
1280                 len = q->rate_mpu;
1281 
1282         if (q->atm_mode == CAKE_ATM_ATM) {
1283                 len += 47;
1284                 len /= 48;
1285                 len *= 53;
1286         } else if (q->atm_mode == CAKE_ATM_PTM) {
1287                 /* Add one byte per 64 bytes or part thereof.
1288                  * This is conservative and easier to calculate than the
1289                  * precise value.
1290                  */
1291                 len += (len + 63) / 64;
1292         }
1293 
1294         if (q->max_adjlen < len)
1295                 q->max_adjlen = len;
1296         if (q->min_adjlen > len)
1297                 q->min_adjlen = len;
1298 
1299         return len;
1300 }
1301 
1302 static u32 cake_overhead(struct cake_sched_data *q, const struct sk_buff *skb)
1303 {
1304         const struct skb_shared_info *shinfo = skb_shinfo(skb);
1305         unsigned int hdr_len, last_len = 0;
1306         u32 off = skb_network_offset(skb);
1307         u32 len = qdisc_pkt_len(skb);
1308         u16 segs = 1;
1309 
1310         q->avg_netoff = cake_ewma(q->avg_netoff, off << 16, 8);
1311 
1312         if (!shinfo->gso_size)
1313                 return cake_calc_overhead(q, len, off);
1314 
1315         /* borrowed from qdisc_pkt_len_init() */
1316         hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
1317 
1318         /* + transport layer */
1319         if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 |
1320                                                 SKB_GSO_TCPV6))) {
1321                 const struct tcphdr *th;
1322                 struct tcphdr _tcphdr;
1323 
1324                 th = skb_header_pointer(skb, skb_transport_offset(skb),
1325                                         sizeof(_tcphdr), &_tcphdr);
1326                 if (likely(th))
1327                         hdr_len += __tcp_hdrlen(th);
1328         } else {
1329                 struct udphdr _udphdr;
1330 
1331                 if (skb_header_pointer(skb, skb_transport_offset(skb),
1332                                        sizeof(_udphdr), &_udphdr))
1333                         hdr_len += sizeof(struct udphdr);
1334         }
1335 
1336         if (unlikely(shinfo->gso_type & SKB_GSO_DODGY))
1337                 segs = DIV_ROUND_UP(skb->len - hdr_len,
1338                                     shinfo->gso_size);
1339         else
1340                 segs = shinfo->gso_segs;
1341 
1342         len = shinfo->gso_size + hdr_len;
1343         last_len = skb->len - shinfo->gso_size * (segs - 1);
1344 
1345         return (cake_calc_overhead(q, len, off) * (segs - 1) +
1346                 cake_calc_overhead(q, last_len, off));
1347 }
1348 
1349 static void cake_heap_swap(struct cake_sched_data *q, u16 i, u16 j)
1350 {
1351         struct cake_heap_entry ii = q->overflow_heap[i];
1352         struct cake_heap_entry jj = q->overflow_heap[j];
1353 
1354         q->overflow_heap[i] = jj;
1355         q->overflow_heap[j] = ii;
1356 
1357         q->tins[ii.t].overflow_idx[ii.b] = j;
1358         q->tins[jj.t].overflow_idx[jj.b] = i;
1359 }
1360 
1361 static u32 cake_heap_get_backlog(const struct cake_sched_data *q, u16 i)
1362 {
1363         struct cake_heap_entry ii = q->overflow_heap[i];
1364 
1365         return q->tins[ii.t].backlogs[ii.b];
1366 }
1367 
1368 static void cake_heapify(struct cake_sched_data *q, u16 i)
1369 {
1370         static const u32 a = CAKE_MAX_TINS * CAKE_QUEUES;
1371         u32 mb = cake_heap_get_backlog(q, i);
1372         u32 m = i;
1373 
1374         while (m < a) {
1375                 u32 l = m + m + 1;
1376                 u32 r = l + 1;
1377 
1378                 if (l < a) {
1379                         u32 lb = cake_heap_get_backlog(q, l);
1380 
1381                         if (lb > mb) {
1382                                 m  = l;
1383                                 mb = lb;
1384                         }
1385                 }
1386 
1387                 if (r < a) {
1388                         u32 rb = cake_heap_get_backlog(q, r);
1389 
1390                         if (rb > mb) {
1391                                 m  = r;
1392                                 mb = rb;
1393                         }
1394                 }
1395 
1396                 if (m != i) {
1397                         cake_heap_swap(q, i, m);
1398                         i = m;
1399                 } else {
1400                         break;
1401                 }
1402         }
1403 }
1404 
1405 static void cake_heapify_up(struct cake_sched_data *q, u16 i)
1406 {
1407         while (i > 0 && i < CAKE_MAX_TINS * CAKE_QUEUES) {
1408                 u16 p = (i - 1) >> 1;
1409                 u32 ib = cake_heap_get_backlog(q, i);
1410                 u32 pb = cake_heap_get_backlog(q, p);
1411 
1412                 if (ib > pb) {
1413                         cake_heap_swap(q, i, p);
1414                         i = p;
1415                 } else {
1416                         break;
1417                 }
1418         }
1419 }
1420 
1421 static int cake_advance_shaper(struct cake_sched_data *q,
1422                                struct cake_tin_data *b,
1423                                struct sk_buff *skb,
1424                                ktime_t now, bool drop)
1425 {
1426         u32 len = get_cobalt_cb(skb)->adjusted_len;
1427 
1428         /* charge packet bandwidth to this tin
1429          * and to the global shaper.
1430          */
1431         if (q->rate_ns) {
1432                 u64 tin_dur = (len * b->tin_rate_ns) >> b->tin_rate_shft;
1433                 u64 global_dur = (len * q->rate_ns) >> q->rate_shft;
1434                 u64 failsafe_dur = global_dur + (global_dur >> 1);
1435 
1436                 if (ktime_before(b->time_next_packet, now))
1437                         b->time_next_packet = ktime_add_ns(b->time_next_packet,
1438                                                            tin_dur);
1439 
1440                 else if (ktime_before(b->time_next_packet,
1441                                       ktime_add_ns(now, tin_dur)))
1442                         b->time_next_packet = ktime_add_ns(now, tin_dur);
1443 
1444                 q->time_next_packet = ktime_add_ns(q->time_next_packet,
1445                                                    global_dur);
1446                 if (!drop)
1447                         q->failsafe_next_packet = \
1448                                 ktime_add_ns(q->failsafe_next_packet,
1449                                              failsafe_dur);
1450         }
1451         return len;
1452 }
1453 
1454 static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free)
1455 {
1456         struct cake_sched_data *q = qdisc_priv(sch);
1457         ktime_t now = ktime_get();
1458         u32 idx = 0, tin = 0, len;
1459         struct cake_heap_entry qq;
1460         struct cake_tin_data *b;
1461         struct cake_flow *flow;
1462         struct sk_buff *skb;
1463 
1464         if (!q->overflow_timeout) {
1465                 int i;
1466                 /* Build fresh max-heap */
1467                 for (i = CAKE_MAX_TINS * CAKE_QUEUES / 2; i >= 0; i--)
1468                         cake_heapify(q, i);
1469         }
1470         q->overflow_timeout = 65535;
1471 
1472         /* select longest queue for pruning */
1473         qq  = q->overflow_heap[0];
1474         tin = qq.t;
1475         idx = qq.b;
1476 
1477         b = &q->tins[tin];
1478         flow = &b->flows[idx];
1479         skb = dequeue_head(flow);
1480         if (unlikely(!skb)) {
1481                 /* heap has gone wrong, rebuild it next time */
1482                 q->overflow_timeout = 0;
1483                 return idx + (tin << 16);
1484         }
1485 
1486         if (cobalt_queue_full(&flow->cvars, &b->cparams, now))
1487                 b->unresponsive_flow_count++;
1488 
1489         len = qdisc_pkt_len(skb);
1490         q->buffer_used      -= skb->truesize;
1491         b->backlogs[idx]    -= len;
1492         b->tin_backlog      -= len;
1493         sch->qstats.backlog -= len;
1494         qdisc_tree_reduce_backlog(sch, 1, len);
1495 
1496         flow->dropped++;
1497         b->tin_dropped++;
1498         sch->qstats.drops++;
1499 
1500         if (q->rate_flags & CAKE_FLAG_INGRESS)
1501                 cake_advance_shaper(q, b, skb, now, true);
1502 
1503         __qdisc_drop(skb, to_free);
1504         sch->q.qlen--;
1505 
1506         cake_heapify(q, 0);
1507 
1508         return idx + (tin << 16);
1509 }
1510 
1511 static void cake_wash_diffserv(struct sk_buff *skb)
1512 {
1513         switch (skb->protocol) {
1514         case htons(ETH_P_IP):
1515                 ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
1516                 break;
1517         case htons(ETH_P_IPV6):
1518                 ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
1519                 break;
1520         default:
1521                 break;
1522         }
1523 }
1524 
1525 static u8 cake_handle_diffserv(struct sk_buff *skb, u16 wash)
1526 {
1527         u8 dscp;
1528 
1529         switch (skb->protocol) {
1530         case htons(ETH_P_IP):
1531                 dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2;
1532                 if (wash && dscp)
1533                         ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, 0);
1534                 return dscp;
1535 
1536         case htons(ETH_P_IPV6):
1537                 dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2;
1538                 if (wash && dscp)
1539                         ipv6_change_dsfield(ipv6_hdr(skb), INET_ECN_MASK, 0);
1540                 return dscp;
1541 
1542         case htons(ETH_P_ARP):
1543                 return 0x38;  /* CS7 - Net Control */
1544 
1545         default:
1546                 /* If there is no Diffserv field, treat as best-effort */
1547                 return 0;
1548         }
1549 }
1550 
1551 static struct cake_tin_data *cake_select_tin(struct Qdisc *sch,
1552                                              struct sk_buff *skb)
1553 {
1554         struct cake_sched_data *q = qdisc_priv(sch);
1555         u32 tin;
1556 
1557         if (TC_H_MAJ(skb->priority) == sch->handle &&
1558             TC_H_MIN(skb->priority) > 0 &&
1559             TC_H_MIN(skb->priority) <= q->tin_cnt) {
1560                 tin = q->tin_order[TC_H_MIN(skb->priority) - 1];
1561 
1562                 if (q->rate_flags & CAKE_FLAG_WASH)
1563                         cake_wash_diffserv(skb);
1564         } else if (q->tin_mode != CAKE_DIFFSERV_BESTEFFORT) {
1565                 /* extract the Diffserv Precedence field, if it exists */
1566                 /* and clear DSCP bits if washing */
1567                 tin = q->tin_index[cake_handle_diffserv(skb,
1568                                 q->rate_flags & CAKE_FLAG_WASH)];
1569                 if (unlikely(tin >= q->tin_cnt))
1570                         tin = 0;
1571         } else {
1572                 tin = 0;
1573                 if (q->rate_flags & CAKE_FLAG_WASH)
1574                         cake_wash_diffserv(skb);
1575         }
1576 
1577         return &q->tins[tin];
1578 }
1579 
1580 static u32 cake_classify(struct Qdisc *sch, struct cake_tin_data **t,
1581                          struct sk_buff *skb, int flow_mode, int *qerr)
1582 {
1583         struct cake_sched_data *q = qdisc_priv(sch);
1584         struct tcf_proto *filter;
1585         struct tcf_result res;
1586         u16 flow = 0, host = 0;
1587         int result;
1588 
1589         filter = rcu_dereference_bh(q->filter_list);
1590         if (!filter)
1591                 goto hash;
1592 
1593         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
1594         result = tcf_classify(skb, filter, &res, false);
1595 
1596         if (result >= 0) {
1597 #ifdef CONFIG_NET_CLS_ACT
1598                 switch (result) {
1599                 case TC_ACT_STOLEN:
1600                 case TC_ACT_QUEUED:
1601                 case TC_ACT_TRAP:
1602                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
1603                         /* fall through */
1604                 case TC_ACT_SHOT:
1605                         return 0;
1606                 }
1607 #endif
1608                 if (TC_H_MIN(res.classid) <= CAKE_QUEUES)
1609                         flow = TC_H_MIN(res.classid);
1610                 if (TC_H_MAJ(res.classid) <= (CAKE_QUEUES << 16))
1611                         host = TC_H_MAJ(res.classid) >> 16;
1612         }
1613 hash:
1614         *t = cake_select_tin(sch, skb);
1615         return cake_hash(*t, skb, flow_mode, flow, host) + 1;
1616 }
1617 
1618 static void cake_reconfigure(struct Qdisc *sch);
1619 
1620 static s32 cake_enqueue(struct sk_buff *skb, struct Qdisc *sch,
1621                         struct sk_buff **to_free)
1622 {
1623         struct cake_sched_data *q = qdisc_priv(sch);
1624         int len = qdisc_pkt_len(skb);
1625         int uninitialized_var(ret);
1626         struct sk_buff *ack = NULL;
1627         ktime_t now = ktime_get();
1628         struct cake_tin_data *b;
1629         struct cake_flow *flow;
1630         u32 idx;
1631 
1632         /* choose flow to insert into */
1633         idx = cake_classify(sch, &b, skb, q->flow_mode, &ret);
1634         if (idx == 0) {
1635                 if (ret & __NET_XMIT_BYPASS)
1636                         qdisc_qstats_drop(sch);
1637                 __qdisc_drop(skb, to_free);
1638                 return ret;
1639         }
1640         idx--;
1641         flow = &b->flows[idx];
1642 
1643         /* ensure shaper state isn't stale */
1644         if (!b->tin_backlog) {
1645                 if (ktime_before(b->time_next_packet, now))
1646                         b->time_next_packet = now;
1647 
1648                 if (!sch->q.qlen) {
1649                         if (ktime_before(q->time_next_packet, now)) {
1650                                 q->failsafe_next_packet = now;
1651                                 q->time_next_packet = now;
1652                         } else if (ktime_after(q->time_next_packet, now) &&
1653                                    ktime_after(q->failsafe_next_packet, now)) {
1654                                 u64 next = \
1655                                         min(ktime_to_ns(q->time_next_packet),
1656                                             ktime_to_ns(
1657                                                    q->failsafe_next_packet));
1658                                 sch->qstats.overlimits++;
1659                                 qdisc_watchdog_schedule_ns(&q->watchdog, next);
1660                         }
1661                 }
1662         }
1663 
1664         if (unlikely(len > b->max_skblen))
1665                 b->max_skblen = len;
1666 
1667         if (skb_is_gso(skb) && q->rate_flags & CAKE_FLAG_SPLIT_GSO) {
1668                 struct sk_buff *segs, *nskb;
1669                 netdev_features_t features = netif_skb_features(skb);
1670                 unsigned int slen = 0;
1671 
1672                 segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
1673                 if (IS_ERR_OR_NULL(segs))
1674                         return qdisc_drop(skb, sch, to_free);
1675 
1676                 while (segs) {
1677                         nskb = segs->next;
1678                         skb_mark_not_on_list(segs);
1679                         qdisc_skb_cb(segs)->pkt_len = segs->len;
1680                         cobalt_set_enqueue_time(segs, now);
1681                         get_cobalt_cb(segs)->adjusted_len = cake_overhead(q,
1682                                                                           segs);
1683                         flow_queue_add(flow, segs);
1684 
1685                         sch->q.qlen++;
1686                         slen += segs->len;
1687                         q->buffer_used += segs->truesize;
1688                         b->packets++;
1689                         segs = nskb;
1690                 }
1691 
1692                 /* stats */
1693                 b->bytes            += slen;
1694                 b->backlogs[idx]    += slen;
1695                 b->tin_backlog      += slen;
1696                 sch->qstats.backlog += slen;
1697                 q->avg_window_bytes += slen;
1698 
1699                 qdisc_tree_reduce_backlog(sch, 1, len);
1700                 consume_skb(skb);
1701         } else {
1702                 /* not splitting */
1703                 cobalt_set_enqueue_time(skb, now);
1704                 get_cobalt_cb(skb)->adjusted_len = cake_overhead(q, skb);
1705                 flow_queue_add(flow, skb);
1706 
1707                 if (q->ack_filter)
1708                         ack = cake_ack_filter(q, flow);
1709 
1710                 if (ack) {
1711                         b->ack_drops++;
1712                         sch->qstats.drops++;
1713                         b->bytes += qdisc_pkt_len(ack);
1714                         len -= qdisc_pkt_len(ack);
1715                         q->buffer_used += skb->truesize - ack->truesize;
1716                         if (q->rate_flags & CAKE_FLAG_INGRESS)
1717                                 cake_advance_shaper(q, b, ack, now, true);
1718 
1719                         qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(ack));
1720                         consume_skb(ack);
1721                 } else {
1722                         sch->q.qlen++;
1723                         q->buffer_used      += skb->truesize;
1724                 }
1725 
1726                 /* stats */
1727                 b->packets++;
1728                 b->bytes            += len;
1729                 b->backlogs[idx]    += len;
1730                 b->tin_backlog      += len;
1731                 sch->qstats.backlog += len;
1732                 q->avg_window_bytes += len;
1733         }
1734 
1735         if (q->overflow_timeout)
1736                 cake_heapify_up(q, b->overflow_idx[idx]);
1737 
1738         /* incoming bandwidth capacity estimate */
1739         if (q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS) {
1740                 u64 packet_interval = \
1741                         ktime_to_ns(ktime_sub(now, q->last_packet_time));
1742 
1743                 if (packet_interval > NSEC_PER_SEC)
1744                         packet_interval = NSEC_PER_SEC;
1745 
1746                 /* filter out short-term bursts, eg. wifi aggregation */
1747                 q->avg_packet_interval = \
1748                         cake_ewma(q->avg_packet_interval,
1749                                   packet_interval,
1750                                   (packet_interval > q->avg_packet_interval ?
1751                                           2 : 8));
1752 
1753                 q->last_packet_time = now;
1754 
1755                 if (packet_interval > q->avg_packet_interval) {
1756                         u64 window_interval = \
1757                                 ktime_to_ns(ktime_sub(now,
1758                                                       q->avg_window_begin));
1759                         u64 b = q->avg_window_bytes * (u64)NSEC_PER_SEC;
1760 
1761                         do_div(b, window_interval);
1762                         q->avg_peak_bandwidth =
1763                                 cake_ewma(q->avg_peak_bandwidth, b,
1764                                           b > q->avg_peak_bandwidth ? 2 : 8);
1765                         q->avg_window_bytes = 0;
1766                         q->avg_window_begin = now;
1767 
1768                         if (ktime_after(now,
1769                                         ktime_add_ms(q->last_reconfig_time,
1770                                                      250))) {
1771                                 q->rate_bps = (q->avg_peak_bandwidth * 15) >> 4;
1772                                 cake_reconfigure(sch);
1773                         }
1774                 }
1775         } else {
1776                 q->avg_window_bytes = 0;
1777                 q->last_packet_time = now;
1778         }
1779 
1780         /* flowchain */
1781         if (!flow->set || flow->set == CAKE_SET_DECAYING) {
1782                 struct cake_host *srchost = &b->hosts[flow->srchost];
1783                 struct cake_host *dsthost = &b->hosts[flow->dsthost];
1784                 u16 host_load = 1;
1785 
1786                 if (!flow->set) {
1787                         list_add_tail(&flow->flowchain, &b->new_flows);
1788                 } else {
1789                         b->decaying_flow_count--;
1790                         list_move_tail(&flow->flowchain, &b->new_flows);
1791                 }
1792                 flow->set = CAKE_SET_SPARSE;
1793                 b->sparse_flow_count++;
1794 
1795                 if (cake_dsrc(q->flow_mode))
1796                         host_load = max(host_load, srchost->srchost_refcnt);
1797 
1798                 if (cake_ddst(q->flow_mode))
1799                         host_load = max(host_load, dsthost->dsthost_refcnt);
1800 
1801                 flow->deficit = (b->flow_quantum *
1802                                  quantum_div[host_load]) >> 16;
1803         } else if (flow->set == CAKE_SET_SPARSE_WAIT) {
1804                 /* this flow was empty, accounted as a sparse flow, but actually
1805                  * in the bulk rotation.
1806                  */
1807                 flow->set = CAKE_SET_BULK;
1808                 b->sparse_flow_count--;
1809                 b->bulk_flow_count++;
1810         }
1811 
1812         if (q->buffer_used > q->buffer_max_used)
1813                 q->buffer_max_used = q->buffer_used;
1814 
1815         if (q->buffer_used > q->buffer_limit) {
1816                 u32 dropped = 0;
1817 
1818                 while (q->buffer_used > q->buffer_limit) {
1819                         dropped++;
1820                         cake_drop(sch, to_free);
1821                 }
1822                 b->drop_overlimit += dropped;
1823         }
1824         return NET_XMIT_SUCCESS;
1825 }
1826 
1827 static struct sk_buff *cake_dequeue_one(struct Qdisc *sch)
1828 {
1829         struct cake_sched_data *q = qdisc_priv(sch);
1830         struct cake_tin_data *b = &q->tins[q->cur_tin];
1831         struct cake_flow *flow = &b->flows[q->cur_flow];
1832         struct sk_buff *skb = NULL;
1833         u32 len;
1834 
1835         if (flow->head) {
1836                 skb = dequeue_head(flow);
1837                 len = qdisc_pkt_len(skb);
1838                 b->backlogs[q->cur_flow] -= len;
1839                 b->tin_backlog           -= len;
1840                 sch->qstats.backlog      -= len;
1841                 q->buffer_used           -= skb->truesize;
1842                 sch->q.qlen--;
1843 
1844                 if (q->overflow_timeout)
1845                         cake_heapify(q, b->overflow_idx[q->cur_flow]);
1846         }
1847         return skb;
1848 }
1849 
1850 /* Discard leftover packets from a tin no longer in use. */
1851 static void cake_clear_tin(struct Qdisc *sch, u16 tin)
1852 {
1853         struct cake_sched_data *q = qdisc_priv(sch);
1854         struct sk_buff *skb;
1855 
1856         q->cur_tin = tin;
1857         for (q->cur_flow = 0; q->cur_flow < CAKE_QUEUES; q->cur_flow++)
1858                 while (!!(skb = cake_dequeue_one(sch)))
1859                         kfree_skb(skb);
1860 }
1861 
1862 static struct sk_buff *cake_dequeue(struct Qdisc *sch)
1863 {
1864         struct cake_sched_data *q = qdisc_priv(sch);
1865         struct cake_tin_data *b = &q->tins[q->cur_tin];
1866         struct cake_host *srchost, *dsthost;
1867         ktime_t now = ktime_get();
1868         struct cake_flow *flow;
1869         struct list_head *head;
1870         bool first_flow = true;
1871         struct sk_buff *skb;
1872         u16 host_load;
1873         u64 delay;
1874         u32 len;
1875 
1876 begin:
1877         if (!sch->q.qlen)
1878                 return NULL;
1879 
1880         /* global hard shaper */
1881         if (ktime_after(q->time_next_packet, now) &&
1882             ktime_after(q->failsafe_next_packet, now)) {
1883                 u64 next = min(ktime_to_ns(q->time_next_packet),
1884                                ktime_to_ns(q->failsafe_next_packet));
1885 
1886                 sch->qstats.overlimits++;
1887                 qdisc_watchdog_schedule_ns(&q->watchdog, next);
1888                 return NULL;
1889         }
1890 
1891         /* Choose a class to work on. */
1892         if (!q->rate_ns) {
1893                 /* In unlimited mode, can't rely on shaper timings, just balance
1894                  * with DRR
1895                  */
1896                 bool wrapped = false, empty = true;
1897 
1898                 while (b->tin_deficit < 0 ||
1899                        !(b->sparse_flow_count + b->bulk_flow_count)) {
1900                         if (b->tin_deficit <= 0)
1901                                 b->tin_deficit += b->tin_quantum_band;
1902                         if (b->sparse_flow_count + b->bulk_flow_count)
1903                                 empty = false;
1904 
1905                         q->cur_tin++;
1906                         b++;
1907                         if (q->cur_tin >= q->tin_cnt) {
1908                                 q->cur_tin = 0;
1909                                 b = q->tins;
1910 
1911                                 if (wrapped) {
1912                                         /* It's possible for q->qlen to be
1913                                          * nonzero when we actually have no
1914                                          * packets anywhere.
1915                                          */
1916                                         if (empty)
1917                                                 return NULL;
1918                                 } else {
1919                                         wrapped = true;
1920                                 }
1921                         }
1922                 }
1923         } else {
1924                 /* In shaped mode, choose:
1925                  * - Highest-priority tin with queue and meeting schedule, or
1926                  * - The earliest-scheduled tin with queue.
1927                  */
1928                 ktime_t best_time = KTIME_MAX;
1929                 int tin, best_tin = 0;
1930 
1931                 for (tin = 0; tin < q->tin_cnt; tin++) {
1932                         b = q->tins + tin;
1933                         if ((b->sparse_flow_count + b->bulk_flow_count) > 0) {
1934                                 ktime_t time_to_pkt = \
1935                                         ktime_sub(b->time_next_packet, now);
1936 
1937                                 if (ktime_to_ns(time_to_pkt) <= 0 ||
1938                                     ktime_compare(time_to_pkt,
1939                                                   best_time) <= 0) {
1940                                         best_time = time_to_pkt;
1941                                         best_tin = tin;
1942                                 }
1943                         }
1944                 }
1945 
1946                 q->cur_tin = best_tin;
1947                 b = q->tins + best_tin;
1948 
1949                 /* No point in going further if no packets to deliver. */
1950                 if (unlikely(!(b->sparse_flow_count + b->bulk_flow_count)))
1951                         return NULL;
1952         }
1953 
1954 retry:
1955         /* service this class */
1956         head = &b->decaying_flows;
1957         if (!first_flow || list_empty(head)) {
1958                 head = &b->new_flows;
1959                 if (list_empty(head)) {
1960                         head = &b->old_flows;
1961                         if (unlikely(list_empty(head))) {
1962                                 head = &b->decaying_flows;
1963                                 if (unlikely(list_empty(head)))
1964                                         goto begin;
1965                         }
1966                 }
1967         }
1968         flow = list_first_entry(head, struct cake_flow, flowchain);
1969         q->cur_flow = flow - b->flows;
1970         first_flow = false;
1971 
1972         /* triple isolation (modified DRR++) */
1973         srchost = &b->hosts[flow->srchost];
1974         dsthost = &b->hosts[flow->dsthost];
1975         host_load = 1;
1976 
1977         if (cake_dsrc(q->flow_mode))
1978                 host_load = max(host_load, srchost->srchost_refcnt);
1979 
1980         if (cake_ddst(q->flow_mode))
1981                 host_load = max(host_load, dsthost->dsthost_refcnt);
1982 
1983         WARN_ON(host_load > CAKE_QUEUES);
1984 
1985         /* flow isolation (DRR++) */
1986         if (flow->deficit <= 0) {
1987                 /* The shifted prandom_u32() is a way to apply dithering to
1988                  * avoid accumulating roundoff errors
1989                  */
1990                 flow->deficit += (b->flow_quantum * quantum_div[host_load] +
1991                                   (prandom_u32() >> 16)) >> 16;
1992                 list_move_tail(&flow->flowchain, &b->old_flows);
1993 
1994                 /* Keep all flows with deficits out of the sparse and decaying
1995                  * rotations.  No non-empty flow can go into the decaying
1996                  * rotation, so they can't get deficits
1997                  */
1998                 if (flow->set == CAKE_SET_SPARSE) {
1999                         if (flow->head) {
2000                                 b->sparse_flow_count--;
2001                                 b->bulk_flow_count++;
2002                                 flow->set = CAKE_SET_BULK;
2003                         } else {
2004                                 /* we've moved it to the bulk rotation for
2005                                  * correct deficit accounting but we still want
2006                                  * to count it as a sparse flow, not a bulk one.
2007                                  */
2008                                 flow->set = CAKE_SET_SPARSE_WAIT;
2009                         }
2010                 }
2011                 goto retry;
2012         }
2013 
2014         /* Retrieve a packet via the AQM */
2015         while (1) {
2016                 skb = cake_dequeue_one(sch);
2017                 if (!skb) {
2018                         /* this queue was actually empty */
2019                         if (cobalt_queue_empty(&flow->cvars, &b->cparams, now))
2020                                 b->unresponsive_flow_count--;
2021 
2022                         if (flow->cvars.p_drop || flow->cvars.count ||
2023                             ktime_before(now, flow->cvars.drop_next)) {
2024                                 /* keep in the flowchain until the state has
2025                                  * decayed to rest
2026                                  */
2027                                 list_move_tail(&flow->flowchain,
2028                                                &b->decaying_flows);
2029                                 if (flow->set == CAKE_SET_BULK) {
2030                                         b->bulk_flow_count--;
2031                                         b->decaying_flow_count++;
2032                                 } else if (flow->set == CAKE_SET_SPARSE ||
2033                                            flow->set == CAKE_SET_SPARSE_WAIT) {
2034                                         b->sparse_flow_count--;
2035                                         b->decaying_flow_count++;
2036                                 }
2037                                 flow->set = CAKE_SET_DECAYING;
2038                         } else {
2039                                 /* remove empty queue from the flowchain */
2040                                 list_del_init(&flow->flowchain);
2041                                 if (flow->set == CAKE_SET_SPARSE ||
2042                                     flow->set == CAKE_SET_SPARSE_WAIT)
2043                                         b->sparse_flow_count--;
2044                                 else if (flow->set == CAKE_SET_BULK)
2045                                         b->bulk_flow_count--;
2046                                 else
2047                                         b->decaying_flow_count--;
2048 
2049                                 flow->set = CAKE_SET_NONE;
2050                                 srchost->srchost_refcnt--;
2051                                 dsthost->dsthost_refcnt--;
2052                         }
2053                         goto begin;
2054                 }
2055 
2056                 /* Last packet in queue may be marked, shouldn't be dropped */
2057                 if (!cobalt_should_drop(&flow->cvars, &b->cparams, now, skb,
2058                                         (b->bulk_flow_count *
2059                                          !!(q->rate_flags &
2060                                             CAKE_FLAG_INGRESS))) ||
2061                     !flow->head)
2062                         break;
2063 
2064                 /* drop this packet, get another one */
2065                 if (q->rate_flags & CAKE_FLAG_INGRESS) {
2066                         len = cake_advance_shaper(q, b, skb,
2067                                                   now, true);
2068                         flow->deficit -= len;
2069                         b->tin_deficit -= len;
2070                 }
2071                 flow->dropped++;
2072                 b->tin_dropped++;
2073                 qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
2074                 qdisc_qstats_drop(sch);
2075                 kfree_skb(skb);
2076                 if (q->rate_flags & CAKE_FLAG_INGRESS)
2077                         goto retry;
2078         }
2079 
2080         b->tin_ecn_mark += !!flow->cvars.ecn_marked;
2081         qdisc_bstats_update(sch, skb);
2082 
2083         /* collect delay stats */
2084         delay = ktime_to_ns(ktime_sub(now, cobalt_get_enqueue_time(skb)));
2085         b->avge_delay = cake_ewma(b->avge_delay, delay, 8);
2086         b->peak_delay = cake_ewma(b->peak_delay, delay,
2087                                   delay > b->peak_delay ? 2 : 8);
2088         b->base_delay = cake_ewma(b->base_delay, delay,
2089                                   delay < b->base_delay ? 2 : 8);
2090 
2091         len = cake_advance_shaper(q, b, skb, now, false);
2092         flow->deficit -= len;
2093         b->tin_deficit -= len;
2094 
2095         if (ktime_after(q->time_next_packet, now) && sch->q.qlen) {
2096                 u64 next = min(ktime_to_ns(q->time_next_packet),
2097                                ktime_to_ns(q->failsafe_next_packet));
2098 
2099                 qdisc_watchdog_schedule_ns(&q->watchdog, next);
2100         } else if (!sch->q.qlen) {
2101                 int i;
2102 
2103                 for (i = 0; i < q->tin_cnt; i++) {
2104                         if (q->tins[i].decaying_flow_count) {
2105                                 ktime_t next = \
2106                                         ktime_add_ns(now,
2107                                                      q->tins[i].cparams.target);
2108 
2109                                 qdisc_watchdog_schedule_ns(&q->watchdog,
2110                                                            ktime_to_ns(next));
2111                                 break;
2112                         }
2113                 }
2114         }
2115 
2116         if (q->overflow_timeout)
2117                 q->overflow_timeout--;
2118 
2119         return skb;
2120 }
2121 
2122 static void cake_reset(struct Qdisc *sch)
2123 {
2124         u32 c;
2125 
2126         for (c = 0; c < CAKE_MAX_TINS; c++)
2127                 cake_clear_tin(sch, c);
2128 }
2129 
2130 static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = {
2131         [TCA_CAKE_BASE_RATE64]   = { .type = NLA_U64 },
2132         [TCA_CAKE_DIFFSERV_MODE] = { .type = NLA_U32 },
2133         [TCA_CAKE_ATM]           = { .type = NLA_U32 },
2134         [TCA_CAKE_FLOW_MODE]     = { .type = NLA_U32 },
2135         [TCA_CAKE_OVERHEAD]      = { .type = NLA_S32 },
2136         [TCA_CAKE_RTT]           = { .type = NLA_U32 },
2137         [TCA_CAKE_TARGET]        = { .type = NLA_U32 },
2138         [TCA_CAKE_AUTORATE]      = { .type = NLA_U32 },
2139         [TCA_CAKE_MEMORY]        = { .type = NLA_U32 },
2140         [TCA_CAKE_NAT]           = { .type = NLA_U32 },
2141         [TCA_CAKE_RAW]           = { .type = NLA_U32 },
2142         [TCA_CAKE_WASH]          = { .type = NLA_U32 },
2143         [TCA_CAKE_MPU]           = { .type = NLA_U32 },
2144         [TCA_CAKE_INGRESS]       = { .type = NLA_U32 },
2145         [TCA_CAKE_ACK_FILTER]    = { .type = NLA_U32 },
2146 };
2147 
2148 static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu,
2149                           u64 target_ns, u64 rtt_est_ns)
2150 {
2151         /* convert byte-rate into time-per-byte
2152          * so it will always unwedge in reasonable time.
2153          */
2154         static const u64 MIN_RATE = 64;
2155         u32 byte_target = mtu;
2156         u64 byte_target_ns;
2157         u8  rate_shft = 0;
2158         u64 rate_ns = 0;
2159 
2160         b->flow_quantum = 1514;
2161         if (rate) {
2162                 b->flow_quantum = max(min(rate >> 12, 1514ULL), 300ULL);
2163                 rate_shft = 34;
2164                 rate_ns = ((u64)NSEC_PER_SEC) << rate_shft;
2165                 rate_ns = div64_u64(rate_ns, max(MIN_RATE, rate));
2166                 while (!!(rate_ns >> 34)) {
2167                         rate_ns >>= 1;
2168                         rate_shft--;
2169                 }
2170         } /* else unlimited, ie. zero delay */
2171 
2172         b->tin_rate_bps  = rate;
2173         b->tin_rate_ns   = rate_ns;
2174         b->tin_rate_shft = rate_shft;
2175 
2176         byte_target_ns = (byte_target * rate_ns) >> rate_shft;
2177 
2178         b->cparams.target = max((byte_target_ns * 3) / 2, target_ns);
2179         b->cparams.interval = max(rtt_est_ns +
2180                                      b->cparams.target - target_ns,
2181                                      b->cparams.target * 2);
2182         b->cparams.mtu_time = byte_target_ns;
2183         b->cparams.p_inc = 1 << 24; /* 1/256 */
2184         b->cparams.p_dec = 1 << 20; /* 1/4096 */
2185 }
2186 
2187 static int cake_config_besteffort(struct Qdisc *sch)
2188 {
2189         struct cake_sched_data *q = qdisc_priv(sch);
2190         struct cake_tin_data *b = &q->tins[0];
2191         u32 mtu = psched_mtu(qdisc_dev(sch));
2192         u64 rate = q->rate_bps;
2193 
2194         q->tin_cnt = 1;
2195 
2196         q->tin_index = besteffort;
2197         q->tin_order = normal_order;
2198 
2199         cake_set_rate(b, rate, mtu,
2200                       us_to_ns(q->target), us_to_ns(q->interval));
2201         b->tin_quantum_band = 65535;
2202         b->tin_quantum_prio = 65535;
2203 
2204         return 0;
2205 }
2206 
2207 static int cake_config_precedence(struct Qdisc *sch)
2208 {
2209         /* convert high-level (user visible) parameters into internal format */
2210         struct cake_sched_data *q = qdisc_priv(sch);
2211         u32 mtu = psched_mtu(qdisc_dev(sch));
2212         u64 rate = q->rate_bps;
2213         u32 quantum1 = 256;
2214         u32 quantum2 = 256;
2215         u32 i;
2216 
2217         q->tin_cnt = 8;
2218         q->tin_index = precedence;
2219         q->tin_order = normal_order;
2220 
2221         for (i = 0; i < q->tin_cnt; i++) {
2222                 struct cake_tin_data *b = &q->tins[i];
2223 
2224                 cake_set_rate(b, rate, mtu, us_to_ns(q->target),
2225                               us_to_ns(q->interval));
2226 
2227                 b->tin_quantum_prio = max_t(u16, 1U, quantum1);
2228                 b->tin_quantum_band = max_t(u16, 1U, quantum2);
2229 
2230                 /* calculate next class's parameters */
2231                 rate  *= 7;
2232                 rate >>= 3;
2233 
2234                 quantum1  *= 3;
2235                 quantum1 >>= 1;
2236 
2237                 quantum2  *= 7;
2238                 quantum2 >>= 3;
2239         }
2240 
2241         return 0;
2242 }
2243 
2244 /*      List of known Diffserv codepoints:
2245  *
2246  *      Least Effort (CS1)
2247  *      Best Effort (CS0)
2248  *      Max Reliability & LLT "Lo" (TOS1)
2249  *      Max Throughput (TOS2)
2250  *      Min Delay (TOS4)
2251  *      LLT "La" (TOS5)
2252  *      Assured Forwarding 1 (AF1x) - x3
2253  *      Assured Forwarding 2 (AF2x) - x3
2254  *      Assured Forwarding 3 (AF3x) - x3
2255  *      Assured Forwarding 4 (AF4x) - x3
2256  *      Precedence Class 2 (CS2)
2257  *      Precedence Class 3 (CS3)
2258  *      Precedence Class 4 (CS4)
2259  *      Precedence Class 5 (CS5)
2260  *      Precedence Class 6 (CS6)
2261  *      Precedence Class 7 (CS7)
2262  *      Voice Admit (VA)
2263  *      Expedited Forwarding (EF)
2264 
2265  *      Total 25 codepoints.
2266  */
2267 
2268 /*      List of traffic classes in RFC 4594:
2269  *              (roughly descending order of contended priority)
2270  *              (roughly ascending order of uncontended throughput)
2271  *
2272  *      Network Control (CS6,CS7)      - routing traffic
2273  *      Telephony (EF,VA)         - aka. VoIP streams
2274  *      Signalling (CS5)               - VoIP setup
2275  *      Multimedia Conferencing (AF4x) - aka. video calls
2276  *      Realtime Interactive (CS4)     - eg. games
2277  *      Multimedia Streaming (AF3x)    - eg. YouTube, NetFlix, Twitch
2278  *      Broadcast Video (CS3)
2279  *      Low Latency Data (AF2x,TOS4)      - eg. database
2280  *      Ops, Admin, Management (CS2,TOS1) - eg. ssh
2281  *      Standard Service (CS0 & unrecognised codepoints)
2282  *      High Throughput Data (AF1x,TOS2)  - eg. web traffic
2283  *      Low Priority Data (CS1)           - eg. BitTorrent
2284 
2285  *      Total 12 traffic classes.
2286  */
2287 
2288 static int cake_config_diffserv8(struct Qdisc *sch)
2289 {
2290 /*      Pruned list of traffic classes for typical applications:
2291  *
2292  *              Network Control          (CS6, CS7)
2293  *              Minimum Latency          (EF, VA, CS5, CS4)
2294  *              Interactive Shell        (CS2, TOS1)
2295  *              Low Latency Transactions (AF2x, TOS4)
2296  *              Video Streaming          (AF4x, AF3x, CS3)
2297  *              Bog Standard             (CS0 etc.)
2298  *              High Throughput          (AF1x, TOS2)
2299  *              Background Traffic       (CS1)
2300  *
2301  *              Total 8 traffic classes.
2302  */
2303 
2304         struct cake_sched_data *q = qdisc_priv(sch);
2305         u32 mtu = psched_mtu(qdisc_dev(sch));
2306         u64 rate = q->rate_bps;
2307         u32 quantum1 = 256;
2308         u32 quantum2 = 256;
2309         u32 i;
2310 
2311         q->tin_cnt = 8;
2312 
2313         /* codepoint to class mapping */
2314         q->tin_index = diffserv8;
2315         q->tin_order = normal_order;
2316 
2317         /* class characteristics */
2318         for (i = 0; i < q->tin_cnt; i++) {
2319                 struct cake_tin_data *b = &q->tins[i];
2320 
2321                 cake_set_rate(b, rate, mtu, us_to_ns(q->target),
2322                               us_to_ns(q->interval));
2323 
2324                 b->tin_quantum_prio = max_t(u16, 1U, quantum1);
2325                 b->tin_quantum_band = max_t(u16, 1U, quantum2);
2326 
2327                 /* calculate next class's parameters */
2328                 rate  *= 7;
2329                 rate >>= 3;
2330 
2331                 quantum1  *= 3;
2332                 quantum1 >>= 1;
2333 
2334                 quantum2  *= 7;
2335                 quantum2 >>= 3;
2336         }
2337 
2338         return 0;
2339 }
2340 
2341 static int cake_config_diffserv4(struct Qdisc *sch)
2342 {
2343 /*  Further pruned list of traffic classes for four-class system:
2344  *
2345  *          Latency Sensitive  (CS7, CS6, EF, VA, CS5, CS4)
2346  *          Streaming Media    (AF4x, AF3x, CS3, AF2x, TOS4, CS2, TOS1)
2347  *          Best Effort        (CS0, AF1x, TOS2, and those not specified)
2348  *          Background Traffic (CS1)
2349  *
2350  *              Total 4 traffic classes.
2351  */
2352 
2353         struct cake_sched_data *q = qdisc_priv(sch);
2354         u32 mtu = psched_mtu(qdisc_dev(sch));
2355         u64 rate = q->rate_bps;
2356         u32 quantum = 1024;
2357 
2358         q->tin_cnt = 4;
2359 
2360         /* codepoint to class mapping */
2361         q->tin_index = diffserv4;
2362         q->tin_order = bulk_order;
2363 
2364         /* class characteristics */
2365         cake_set_rate(&q->tins[0], rate, mtu,
2366                       us_to_ns(q->target), us_to_ns(q->interval));
2367         cake_set_rate(&q->tins[1], rate >> 4, mtu,
2368                       us_to_ns(q->target), us_to_ns(q->interval));
2369         cake_set_rate(&q->tins[2], rate >> 1, mtu,
2370                       us_to_ns(q->target), us_to_ns(q->interval));
2371         cake_set_rate(&q->tins[3], rate >> 2, mtu,
2372                       us_to_ns(q->target), us_to_ns(q->interval));
2373 
2374         /* priority weights */
2375         q->tins[0].tin_quantum_prio = quantum;
2376         q->tins[1].tin_quantum_prio = quantum >> 4;
2377         q->tins[2].tin_quantum_prio = quantum << 2;
2378         q->tins[3].tin_quantum_prio = quantum << 4;
2379 
2380         /* bandwidth-sharing weights */
2381         q->tins[0].tin_quantum_band = quantum;
2382         q->tins[1].tin_quantum_band = quantum >> 4;
2383         q->tins[2].tin_quantum_band = quantum >> 1;
2384         q->tins[3].tin_quantum_band = quantum >> 2;
2385 
2386         return 0;
2387 }
2388 
2389 static int cake_config_diffserv3(struct Qdisc *sch)
2390 {
2391 /*  Simplified Diffserv structure with 3 tins.
2392  *              Low Priority            (CS1)
2393  *              Best Effort
2394  *              Latency Sensitive       (TOS4, VA, EF, CS6, CS7)
2395  */
2396         struct cake_sched_data *q = qdisc_priv(sch);
2397         u32 mtu = psched_mtu(qdisc_dev(sch));
2398         u64 rate = q->rate_bps;
2399         u32 quantum = 1024;
2400 
2401         q->tin_cnt = 3;
2402 
2403         /* codepoint to class mapping */
2404         q->tin_index = diffserv3;
2405         q->tin_order = bulk_order;
2406 
2407         /* class characteristics */
2408         cake_set_rate(&q->tins[0], rate, mtu,
2409                       us_to_ns(q->target), us_to_ns(q->interval));
2410         cake_set_rate(&q->tins[1], rate >> 4, mtu,
2411                       us_to_ns(q->target), us_to_ns(q->interval));
2412         cake_set_rate(&q->tins[2], rate >> 2, mtu,
2413                       us_to_ns(q->target), us_to_ns(q->interval));
2414 
2415         /* priority weights */
2416         q->tins[0].tin_quantum_prio = quantum;
2417         q->tins[1].tin_quantum_prio = quantum >> 4;
2418         q->tins[2].tin_quantum_prio = quantum << 4;
2419 
2420         /* bandwidth-sharing weights */
2421         q->tins[0].tin_quantum_band = quantum;
2422         q->tins[1].tin_quantum_band = quantum >> 4;
2423         q->tins[2].tin_quantum_band = quantum >> 2;
2424 
2425         return 0;
2426 }
2427 
2428 static void cake_reconfigure(struct Qdisc *sch)
2429 {
2430         struct cake_sched_data *q = qdisc_priv(sch);
2431         int c, ft;
2432 
2433         switch (q->tin_mode) {
2434         case CAKE_DIFFSERV_BESTEFFORT:
2435                 ft = cake_config_besteffort(sch);
2436                 break;
2437 
2438         case CAKE_DIFFSERV_PRECEDENCE:
2439                 ft = cake_config_precedence(sch);
2440                 break;
2441 
2442         case CAKE_DIFFSERV_DIFFSERV8:
2443                 ft = cake_config_diffserv8(sch);
2444                 break;
2445 
2446         case CAKE_DIFFSERV_DIFFSERV4:
2447                 ft = cake_config_diffserv4(sch);
2448                 break;
2449 
2450         case CAKE_DIFFSERV_DIFFSERV3:
2451         default:
2452                 ft = cake_config_diffserv3(sch);
2453                 break;
2454         }
2455 
2456         for (c = q->tin_cnt; c < CAKE_MAX_TINS; c++) {
2457                 cake_clear_tin(sch, c);
2458                 q->tins[c].cparams.mtu_time = q->tins[ft].cparams.mtu_time;
2459         }
2460 
2461         q->rate_ns   = q->tins[ft].tin_rate_ns;
2462         q->rate_shft = q->tins[ft].tin_rate_shft;
2463 
2464         if (q->buffer_config_limit) {
2465                 q->buffer_limit = q->buffer_config_limit;
2466         } else if (q->rate_bps) {
2467                 u64 t = q->rate_bps * q->interval;
2468 
2469                 do_div(t, USEC_PER_SEC / 4);
2470                 q->buffer_limit = max_t(u32, t, 4U << 20);
2471         } else {
2472                 q->buffer_limit = ~0;
2473         }
2474 
2475         sch->flags &= ~TCQ_F_CAN_BYPASS;
2476 
2477         q->buffer_limit = min(q->buffer_limit,
2478                               max(sch->limit * psched_mtu(qdisc_dev(sch)),
2479                                   q->buffer_config_limit));
2480 }
2481 
2482 static int cake_change(struct Qdisc *sch, struct nlattr *opt,
2483                        struct netlink_ext_ack *extack)
2484 {
2485         struct cake_sched_data *q = qdisc_priv(sch);
2486         struct nlattr *tb[TCA_CAKE_MAX + 1];
2487         int err;
2488 
2489         if (!opt)
2490                 return -EINVAL;
2491 
2492         err = nla_parse_nested(tb, TCA_CAKE_MAX, opt, cake_policy, extack);
2493         if (err < 0)
2494                 return err;
2495 
2496         if (tb[TCA_CAKE_NAT]) {
2497 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
2498                 q->flow_mode &= ~CAKE_FLOW_NAT_FLAG;
2499                 q->flow_mode |= CAKE_FLOW_NAT_FLAG *
2500                         !!nla_get_u32(tb[TCA_CAKE_NAT]);
2501 #else
2502                 NL_SET_ERR_MSG_ATTR(extack, tb[TCA_CAKE_NAT],
2503                                     "No conntrack support in kernel");
2504                 return -EOPNOTSUPP;
2505 #endif
2506         }
2507 
2508         if (tb[TCA_CAKE_BASE_RATE64])
2509                 q->rate_bps = nla_get_u64(tb[TCA_CAKE_BASE_RATE64]);
2510 
2511         if (tb[TCA_CAKE_DIFFSERV_MODE])
2512                 q->tin_mode = nla_get_u32(tb[TCA_CAKE_DIFFSERV_MODE]);
2513 
2514         if (tb[TCA_CAKE_WASH]) {
2515                 if (!!nla_get_u32(tb[TCA_CAKE_WASH]))
2516                         q->rate_flags |= CAKE_FLAG_WASH;
2517                 else
2518                         q->rate_flags &= ~CAKE_FLAG_WASH;
2519         }
2520 
2521         if (tb[TCA_CAKE_FLOW_MODE])
2522                 q->flow_mode = ((q->flow_mode & CAKE_FLOW_NAT_FLAG) |
2523                                 (nla_get_u32(tb[TCA_CAKE_FLOW_MODE]) &
2524                                         CAKE_FLOW_MASK));
2525 
2526         if (tb[TCA_CAKE_ATM])
2527                 q->atm_mode = nla_get_u32(tb[TCA_CAKE_ATM]);
2528 
2529         if (tb[TCA_CAKE_OVERHEAD]) {
2530                 q->rate_overhead = nla_get_s32(tb[TCA_CAKE_OVERHEAD]);
2531                 q->rate_flags |= CAKE_FLAG_OVERHEAD;
2532 
2533                 q->max_netlen = 0;
2534                 q->max_adjlen = 0;
2535                 q->min_netlen = ~0;
2536                 q->min_adjlen = ~0;
2537         }
2538 
2539         if (tb[TCA_CAKE_RAW]) {
2540                 q->rate_flags &= ~CAKE_FLAG_OVERHEAD;
2541 
2542                 q->max_netlen = 0;
2543                 q->max_adjlen = 0;
2544                 q->min_netlen = ~0;
2545                 q->min_adjlen = ~0;
2546         }
2547 
2548         if (tb[TCA_CAKE_MPU])
2549                 q->rate_mpu = nla_get_u32(tb[TCA_CAKE_MPU]);
2550 
2551         if (tb[TCA_CAKE_RTT]) {
2552                 q->interval = nla_get_u32(tb[TCA_CAKE_RTT]);
2553 
2554                 if (!q->interval)
2555                         q->interval = 1;
2556         }
2557 
2558         if (tb[TCA_CAKE_TARGET]) {
2559                 q->target = nla_get_u32(tb[TCA_CAKE_TARGET]);
2560 
2561                 if (!q->target)
2562                         q->target = 1;
2563         }
2564 
2565         if (tb[TCA_CAKE_AUTORATE]) {
2566                 if (!!nla_get_u32(tb[TCA_CAKE_AUTORATE]))
2567                         q->rate_flags |= CAKE_FLAG_AUTORATE_INGRESS;
2568                 else
2569                         q->rate_flags &= ~CAKE_FLAG_AUTORATE_INGRESS;
2570         }
2571 
2572         if (tb[TCA_CAKE_INGRESS]) {
2573                 if (!!nla_get_u32(tb[TCA_CAKE_INGRESS]))
2574                         q->rate_flags |= CAKE_FLAG_INGRESS;
2575                 else
2576                         q->rate_flags &= ~CAKE_FLAG_INGRESS;
2577         }
2578 
2579         if (tb[TCA_CAKE_ACK_FILTER])
2580                 q->ack_filter = nla_get_u32(tb[TCA_CAKE_ACK_FILTER]);
2581 
2582         if (tb[TCA_CAKE_MEMORY])
2583                 q->buffer_config_limit = nla_get_u32(tb[TCA_CAKE_MEMORY]);
2584 
2585         if (tb[TCA_CAKE_SPLIT_GSO]) {
2586                 if (!!nla_get_u32(tb[TCA_CAKE_SPLIT_GSO]))
2587                         q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
2588                 else
2589                         q->rate_flags &= ~CAKE_FLAG_SPLIT_GSO;
2590         }
2591 
2592         if (q->tins) {
2593                 sch_tree_lock(sch);
2594                 cake_reconfigure(sch);
2595                 sch_tree_unlock(sch);
2596         }
2597 
2598         return 0;
2599 }
2600 
2601 static void cake_destroy(struct Qdisc *sch)
2602 {
2603         struct cake_sched_data *q = qdisc_priv(sch);
2604 
2605         qdisc_watchdog_cancel(&q->watchdog);
2606         tcf_block_put(q->block);
2607         kvfree(q->tins);
2608 }
2609 
2610 static int cake_init(struct Qdisc *sch, struct nlattr *opt,
2611                      struct netlink_ext_ack *extack)
2612 {
2613         struct cake_sched_data *q = qdisc_priv(sch);
2614         int i, j, err;
2615 
2616         sch->limit = 10240;
2617         q->tin_mode = CAKE_DIFFSERV_DIFFSERV3;
2618         q->flow_mode  = CAKE_FLOW_TRIPLE;
2619 
2620         q->rate_bps = 0; /* unlimited by default */
2621 
2622         q->interval = 100000; /* 100ms default */
2623         q->target   =   5000; /* 5ms: codel RFC argues
2624                                * for 5 to 10% of interval
2625                                */
2626         q->rate_flags |= CAKE_FLAG_SPLIT_GSO;
2627         q->cur_tin = 0;
2628         q->cur_flow  = 0;
2629 
2630         qdisc_watchdog_init(&q->watchdog, sch);
2631 
2632         if (opt) {
2633                 int err = cake_change(sch, opt, extack);
2634 
2635                 if (err)
2636                         return err;
2637         }
2638 
2639         err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
2640         if (err)
2641                 return err;
2642 
2643         quantum_div[0] = ~0;
2644         for (i = 1; i <= CAKE_QUEUES; i++)
2645                 quantum_div[i] = 65535 / i;
2646 
2647         q->tins = kvcalloc(CAKE_MAX_TINS, sizeof(struct cake_tin_data),
2648                            GFP_KERNEL);
2649         if (!q->tins)
2650                 goto nomem;
2651 
2652         for (i = 0; i < CAKE_MAX_TINS; i++) {
2653                 struct cake_tin_data *b = q->tins + i;
2654 
2655                 INIT_LIST_HEAD(&b->new_flows);
2656                 INIT_LIST_HEAD(&b->old_flows);
2657                 INIT_LIST_HEAD(&b->decaying_flows);
2658                 b->sparse_flow_count = 0;
2659                 b->bulk_flow_count = 0;
2660                 b->decaying_flow_count = 0;
2661 
2662                 for (j = 0; j < CAKE_QUEUES; j++) {
2663                         struct cake_flow *flow = b->flows + j;
2664                         u32 k = j * CAKE_MAX_TINS + i;
2665 
2666                         INIT_LIST_HEAD(&flow->flowchain);
2667                         cobalt_vars_init(&flow->cvars);
2668 
2669                         q->overflow_heap[k].t = i;
2670                         q->overflow_heap[k].b = j;
2671                         b->overflow_idx[j] = k;
2672                 }
2673         }
2674 
2675         cake_reconfigure(sch);
2676         q->avg_peak_bandwidth = q->rate_bps;
2677         q->min_netlen = ~0;
2678         q->min_adjlen = ~0;
2679         return 0;
2680 
2681 nomem:
2682         cake_destroy(sch);
2683         return -ENOMEM;
2684 }
2685 
2686 static int cake_dump(struct Qdisc *sch, struct sk_buff *skb)
2687 {
2688         struct cake_sched_data *q = qdisc_priv(sch);
2689         struct nlattr *opts;
2690 
2691         opts = nla_nest_start(skb, TCA_OPTIONS);
2692         if (!opts)
2693                 goto nla_put_failure;
2694 
2695         if (nla_put_u64_64bit(skb, TCA_CAKE_BASE_RATE64, q->rate_bps,
2696                               TCA_CAKE_PAD))
2697                 goto nla_put_failure;
2698 
2699         if (nla_put_u32(skb, TCA_CAKE_FLOW_MODE,
2700                         q->flow_mode & CAKE_FLOW_MASK))
2701                 goto nla_put_failure;
2702 
2703         if (nla_put_u32(skb, TCA_CAKE_RTT, q->interval))
2704                 goto nla_put_failure;
2705 
2706         if (nla_put_u32(skb, TCA_CAKE_TARGET, q->target))
2707                 goto nla_put_failure;
2708 
2709         if (nla_put_u32(skb, TCA_CAKE_MEMORY, q->buffer_config_limit))
2710                 goto nla_put_failure;
2711 
2712         if (nla_put_u32(skb, TCA_CAKE_AUTORATE,
2713                         !!(q->rate_flags & CAKE_FLAG_AUTORATE_INGRESS)))
2714                 goto nla_put_failure;
2715 
2716         if (nla_put_u32(skb, TCA_CAKE_INGRESS,
2717                         !!(q->rate_flags & CAKE_FLAG_INGRESS)))
2718                 goto nla_put_failure;
2719 
2720         if (nla_put_u32(skb, TCA_CAKE_ACK_FILTER, q->ack_filter))
2721                 goto nla_put_failure;
2722 
2723         if (nla_put_u32(skb, TCA_CAKE_NAT,
2724                         !!(q->flow_mode & CAKE_FLOW_NAT_FLAG)))
2725                 goto nla_put_failure;
2726 
2727         if (nla_put_u32(skb, TCA_CAKE_DIFFSERV_MODE, q->tin_mode))
2728                 goto nla_put_failure;
2729 
2730         if (nla_put_u32(skb, TCA_CAKE_WASH,
2731                         !!(q->rate_flags & CAKE_FLAG_WASH)))
2732                 goto nla_put_failure;
2733 
2734         if (nla_put_u32(skb, TCA_CAKE_OVERHEAD, q->rate_overhead))
2735                 goto nla_put_failure;
2736 
2737         if (!(q->rate_flags & CAKE_FLAG_OVERHEAD))
2738                 if (nla_put_u32(skb, TCA_CAKE_RAW, 0))
2739                         goto nla_put_failure;
2740 
2741         if (nla_put_u32(skb, TCA_CAKE_ATM, q->atm_mode))
2742                 goto nla_put_failure;
2743 
2744         if (nla_put_u32(skb, TCA_CAKE_MPU, q->rate_mpu))
2745                 goto nla_put_failure;
2746 
2747         if (nla_put_u32(skb, TCA_CAKE_SPLIT_GSO,
2748                         !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO)))
2749                 goto nla_put_failure;
2750 
2751         return nla_nest_end(skb, opts);
2752 
2753 nla_put_failure:
2754         return -1;
2755 }
2756 
2757 static int cake_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
2758 {
2759         struct nlattr *stats = nla_nest_start(d->skb, TCA_STATS_APP);
2760         struct cake_sched_data *q = qdisc_priv(sch);
2761         struct nlattr *tstats, *ts;
2762         int i;
2763 
2764         if (!stats)
2765                 return -1;
2766 
2767 #define PUT_STAT_U32(attr, data) do {                                  \
2768                 if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
2769                         goto nla_put_failure;                          \
2770         } while (0)
2771 #define PUT_STAT_U64(attr, data) do {                                  \
2772                 if (nla_put_u64_64bit(d->skb, TCA_CAKE_STATS_ ## attr, \
2773                                         data, TCA_CAKE_STATS_PAD)) \
2774                         goto nla_put_failure;                          \
2775         } while (0)
2776 
2777         PUT_STAT_U64(CAPACITY_ESTIMATE64, q->avg_peak_bandwidth);
2778         PUT_STAT_U32(MEMORY_LIMIT, q->buffer_limit);
2779         PUT_STAT_U32(MEMORY_USED, q->buffer_max_used);
2780         PUT_STAT_U32(AVG_NETOFF, ((q->avg_netoff + 0x8000) >> 16));
2781         PUT_STAT_U32(MAX_NETLEN, q->max_netlen);
2782         PUT_STAT_U32(MAX_ADJLEN, q->max_adjlen);
2783         PUT_STAT_U32(MIN_NETLEN, q->min_netlen);
2784         PUT_STAT_U32(MIN_ADJLEN, q->min_adjlen);
2785 
2786 #undef PUT_STAT_U32
2787 #undef PUT_STAT_U64
2788 
2789         tstats = nla_nest_start(d->skb, TCA_CAKE_STATS_TIN_STATS);
2790         if (!tstats)
2791                 goto nla_put_failure;
2792 
2793 #define PUT_TSTAT_U32(attr, data) do {                                  \
2794                 if (nla_put_u32(d->skb, TCA_CAKE_TIN_STATS_ ## attr, data)) \
2795                         goto nla_put_failure;                           \
2796         } while (0)
2797 #define PUT_TSTAT_U64(attr, data) do {                                  \
2798                 if (nla_put_u64_64bit(d->skb, TCA_CAKE_TIN_STATS_ ## attr, \
2799                                         data, TCA_CAKE_TIN_STATS_PAD))  \
2800                         goto nla_put_failure;                           \
2801         } while (0)
2802 
2803         for (i = 0; i < q->tin_cnt; i++) {
2804                 struct cake_tin_data *b = &q->tins[q->tin_order[i]];
2805 
2806                 ts = nla_nest_start(d->skb, i + 1);
2807                 if (!ts)
2808                         goto nla_put_failure;
2809 
2810                 PUT_TSTAT_U64(THRESHOLD_RATE64, b->tin_rate_bps);
2811                 PUT_TSTAT_U64(SENT_BYTES64, b->bytes);
2812                 PUT_TSTAT_U32(BACKLOG_BYTES, b->tin_backlog);
2813 
2814                 PUT_TSTAT_U32(TARGET_US,
2815                               ktime_to_us(ns_to_ktime(b->cparams.target)));
2816                 PUT_TSTAT_U32(INTERVAL_US,
2817                               ktime_to_us(ns_to_ktime(b->cparams.interval)));
2818 
2819                 PUT_TSTAT_U32(SENT_PACKETS, b->packets);
2820                 PUT_TSTAT_U32(DROPPED_PACKETS, b->tin_dropped);
2821                 PUT_TSTAT_U32(ECN_MARKED_PACKETS, b->tin_ecn_mark);
2822                 PUT_TSTAT_U32(ACKS_DROPPED_PACKETS, b->ack_drops);
2823 
2824                 PUT_TSTAT_U32(PEAK_DELAY_US,
2825                               ktime_to_us(ns_to_ktime(b->peak_delay)));
2826                 PUT_TSTAT_U32(AVG_DELAY_US,
2827                               ktime_to_us(ns_to_ktime(b->avge_delay)));
2828                 PUT_TSTAT_U32(BASE_DELAY_US,
2829                               ktime_to_us(ns_to_ktime(b->base_delay)));
2830 
2831                 PUT_TSTAT_U32(WAY_INDIRECT_HITS, b->way_hits);
2832                 PUT_TSTAT_U32(WAY_MISSES, b->way_misses);
2833                 PUT_TSTAT_U32(WAY_COLLISIONS, b->way_collisions);
2834 
2835                 PUT_TSTAT_U32(SPARSE_FLOWS, b->sparse_flow_count +
2836                                             b->decaying_flow_count);
2837                 PUT_TSTAT_U32(BULK_FLOWS, b->bulk_flow_count);
2838                 PUT_TSTAT_U32(UNRESPONSIVE_FLOWS, b->unresponsive_flow_count);
2839                 PUT_TSTAT_U32(MAX_SKBLEN, b->max_skblen);
2840 
2841                 PUT_TSTAT_U32(FLOW_QUANTUM, b->flow_quantum);
2842                 nla_nest_end(d->skb, ts);
2843         }
2844 
2845 #undef PUT_TSTAT_U32
2846 #undef PUT_TSTAT_U64
2847 
2848         nla_nest_end(d->skb, tstats);
2849         return nla_nest_end(d->skb, stats);
2850 
2851 nla_put_failure:
2852         nla_nest_cancel(d->skb, stats);
2853         return -1;
2854 }
2855 
2856 static struct Qdisc *cake_leaf(struct Qdisc *sch, unsigned long arg)
2857 {
2858         return NULL;
2859 }
2860 
2861 static unsigned long cake_find(struct Qdisc *sch, u32 classid)
2862 {
2863         return 0;
2864 }
2865 
2866 static unsigned long cake_bind(struct Qdisc *sch, unsigned long parent,
2867                                u32 classid)
2868 {
2869         return 0;
2870 }
2871 
2872 static void cake_unbind(struct Qdisc *q, unsigned long cl)
2873 {
2874 }
2875 
2876 static struct tcf_block *cake_tcf_block(struct Qdisc *sch, unsigned long cl,
2877                                         struct netlink_ext_ack *extack)
2878 {
2879         struct cake_sched_data *q = qdisc_priv(sch);
2880 
2881         if (cl)
2882                 return NULL;
2883         return q->block;
2884 }
2885 
2886 static int cake_dump_class(struct Qdisc *sch, unsigned long cl,
2887                            struct sk_buff *skb, struct tcmsg *tcm)
2888 {
2889         tcm->tcm_handle |= TC_H_MIN(cl);
2890         return 0;
2891 }
2892 
2893 static int cake_dump_class_stats(struct Qdisc *sch, unsigned long cl,
2894                                  struct gnet_dump *d)
2895 {
2896         struct cake_sched_data *q = qdisc_priv(sch);
2897         const struct cake_flow *flow = NULL;
2898         struct gnet_stats_queue qs = { 0 };
2899         struct nlattr *stats;
2900         u32 idx = cl - 1;
2901 
2902         if (idx < CAKE_QUEUES * q->tin_cnt) {
2903                 const struct cake_tin_data *b = \
2904                         &q->tins[q->tin_order[idx / CAKE_QUEUES]];
2905                 const struct sk_buff *skb;
2906 
2907                 flow = &b->flows[idx % CAKE_QUEUES];
2908 
2909                 if (flow->head) {
2910                         sch_tree_lock(sch);
2911                         skb = flow->head;
2912                         while (skb) {
2913                                 qs.qlen++;
2914                                 skb = skb->next;
2915                         }
2916                         sch_tree_unlock(sch);
2917                 }
2918                 qs.backlog = b->backlogs[idx % CAKE_QUEUES];
2919                 qs.drops = flow->dropped;
2920         }
2921         if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0)
2922                 return -1;
2923         if (flow) {
2924                 ktime_t now = ktime_get();
2925 
2926                 stats = nla_nest_start(d->skb, TCA_STATS_APP);
2927                 if (!stats)
2928                         return -1;
2929 
2930 #define PUT_STAT_U32(attr, data) do {                                  \
2931                 if (nla_put_u32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
2932                         goto nla_put_failure;                          \
2933         } while (0)
2934 #define PUT_STAT_S32(attr, data) do {                                  \
2935                 if (nla_put_s32(d->skb, TCA_CAKE_STATS_ ## attr, data)) \
2936                         goto nla_put_failure;                          \
2937         } while (0)
2938 
2939                 PUT_STAT_S32(DEFICIT, flow->deficit);
2940                 PUT_STAT_U32(DROPPING, flow->cvars.dropping);
2941                 PUT_STAT_U32(COBALT_COUNT, flow->cvars.count);
2942                 PUT_STAT_U32(P_DROP, flow->cvars.p_drop);
2943                 if (flow->cvars.p_drop) {
2944                         PUT_STAT_S32(BLUE_TIMER_US,
2945                                      ktime_to_us(
2946                                              ktime_sub(now,
2947                                                      flow->cvars.blue_timer)));
2948                 }
2949                 if (flow->cvars.dropping) {
2950                         PUT_STAT_S32(DROP_NEXT_US,
2951                                      ktime_to_us(
2952                                              ktime_sub(now,
2953                                                        flow->cvars.drop_next)));
2954                 }
2955 
2956                 if (nla_nest_end(d->skb, stats) < 0)
2957                         return -1;
2958         }
2959 
2960         return 0;
2961 
2962 nla_put_failure:
2963         nla_nest_cancel(d->skb, stats);
2964         return -1;
2965 }
2966 
2967 static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2968 {
2969         struct cake_sched_data *q = qdisc_priv(sch);
2970         unsigned int i, j;
2971 
2972         if (arg->stop)
2973                 return;
2974 
2975         for (i = 0; i < q->tin_cnt; i++) {
2976                 struct cake_tin_data *b = &q->tins[q->tin_order[i]];
2977 
2978                 for (j = 0; j < CAKE_QUEUES; j++) {
2979                         if (list_empty(&b->flows[j].flowchain) ||
2980                             arg->count < arg->skip) {
2981                                 arg->count++;
2982                                 continue;
2983                         }
2984                         if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) {
2985                                 arg->stop = 1;
2986                                 break;
2987                         }
2988                         arg->count++;
2989                 }
2990         }
2991 }
2992 
2993 static const struct Qdisc_class_ops cake_class_ops = {
2994         .leaf           =       cake_leaf,
2995         .find           =       cake_find,
2996         .tcf_block      =       cake_tcf_block,
2997         .bind_tcf       =       cake_bind,
2998         .unbind_tcf     =       cake_unbind,
2999         .dump           =       cake_dump_class,
3000         .dump_stats     =       cake_dump_class_stats,
3001         .walk           =       cake_walk,
3002 };
3003 
3004 static struct Qdisc_ops cake_qdisc_ops __read_mostly = {
3005         .cl_ops         =       &cake_class_ops,
3006         .id             =       "cake",
3007         .priv_size      =       sizeof(struct cake_sched_data),
3008         .enqueue        =       cake_enqueue,
3009         .dequeue        =       cake_dequeue,
3010         .peek           =       qdisc_peek_dequeued,
3011         .init           =       cake_init,
3012         .reset          =       cake_reset,
3013         .destroy        =       cake_destroy,
3014         .change         =       cake_change,
3015         .dump           =       cake_dump,
3016         .dump_stats     =       cake_dump_stats,
3017         .owner          =       THIS_MODULE,
3018 };
3019 
3020 static int __init cake_module_init(void)
3021 {
3022         return register_qdisc(&cake_qdisc_ops);
3023 }
3024 
3025 static void __exit cake_module_exit(void)
3026 {
3027         unregister_qdisc(&cake_qdisc_ops);
3028 }
3029 
3030 module_init(cake_module_init)
3031 module_exit(cake_module_exit)
3032 MODULE_AUTHOR("Jonathan Morton");
3033 MODULE_LICENSE("Dual BSD/GPL");
3034 MODULE_DESCRIPTION("The CAKE shaper.");
3035 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp