~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/kernel/irq/timings.c

Version: ~ [ linux-6.2-rc3 ] ~ [ linux-6.1.5 ] ~ [ linux-6.0.19 ] ~ [ linux-5.19.17 ] ~ [ linux-5.18.19 ] ~ [ linux-5.17.15 ] ~ [ linux-5.16.20 ] ~ [ linux-5.15.87 ] ~ [ linux-5.14.21 ] ~ [ linux-5.13.19 ] ~ [ linux-5.12.19 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.162 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.228 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.269 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.302 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.337 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.302 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.9 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0
  2 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
  3 
  4 #include <linux/kernel.h>
  5 #include <linux/percpu.h>
  6 #include <linux/slab.h>
  7 #include <linux/static_key.h>
  8 #include <linux/interrupt.h>
  9 #include <linux/idr.h>
 10 #include <linux/irq.h>
 11 #include <linux/math64.h>
 12 #include <linux/log2.h>
 13 
 14 #include <trace/events/irq.h>
 15 
 16 #include "internals.h"
 17 
 18 DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
 19 
 20 DEFINE_PER_CPU(struct irq_timings, irq_timings);
 21 
 22 static DEFINE_IDR(irqt_stats);
 23 
 24 void irq_timings_enable(void)
 25 {
 26         static_branch_enable(&irq_timing_enabled);
 27 }
 28 
 29 void irq_timings_disable(void)
 30 {
 31         static_branch_disable(&irq_timing_enabled);
 32 }
 33 
 34 /*
 35  * The main goal of this algorithm is to predict the next interrupt
 36  * occurrence on the current CPU.
 37  *
 38  * Currently, the interrupt timings are stored in a circular array
 39  * buffer every time there is an interrupt, as a tuple: the interrupt
 40  * number and the associated timestamp when the event occurred <irq,
 41  * timestamp>.
 42  *
 43  * For every interrupt occurring in a short period of time, we can
 44  * measure the elapsed time between the occurrences for the same
 45  * interrupt and we end up with a suite of intervals. The experience
 46  * showed the interrupts are often coming following a periodic
 47  * pattern.
 48  *
 49  * The objective of the algorithm is to find out this periodic pattern
 50  * in a fastest way and use its period to predict the next irq event.
 51  *
 52  * When the next interrupt event is requested, we are in the situation
 53  * where the interrupts are disabled and the circular buffer
 54  * containing the timings is filled with the events which happened
 55  * after the previous next-interrupt-event request.
 56  *
 57  * At this point, we read the circular buffer and we fill the irq
 58  * related statistics structure. After this step, the circular array
 59  * containing the timings is empty because all the values are
 60  * dispatched in their corresponding buffers.
 61  *
 62  * Now for each interrupt, we can predict the next event by using the
 63  * suffix array, log interval and exponential moving average
 64  *
 65  * 1. Suffix array
 66  *
 67  * Suffix array is an array of all the suffixes of a string. It is
 68  * widely used as a data structure for compression, text search, ...
 69  * For instance for the word 'banana', the suffixes will be: 'banana'
 70  * 'anana' 'nana' 'ana' 'na' 'a'
 71  *
 72  * Usually, the suffix array is sorted but for our purpose it is
 73  * not necessary and won't provide any improvement in the context of
 74  * the solved problem where we clearly define the boundaries of the
 75  * search by a max period and min period.
 76  *
 77  * The suffix array will build a suite of intervals of different
 78  * length and will look for the repetition of each suite. If the suite
 79  * is repeating then we have the period because it is the length of
 80  * the suite whatever its position in the buffer.
 81  *
 82  * 2. Log interval
 83  *
 84  * We saw the irq timings allow to compute the interval of the
 85  * occurrences for a specific interrupt. We can reasonibly assume the
 86  * longer is the interval, the higher is the error for the next event
 87  * and we can consider storing those interval values into an array
 88  * where each slot in the array correspond to an interval at the power
 89  * of 2 of the index. For example, index 12 will contain values
 90  * between 2^11 and 2^12.
 91  *
 92  * At the end we have an array of values where at each index defines a
 93  * [2^index - 1, 2 ^ index] interval values allowing to store a large
 94  * number of values inside a small array.
 95  *
 96  * For example, if we have the value 1123, then we store it at
 97  * ilog2(1123) = 10 index value.
 98  *
 99  * Storing those value at the specific index is done by computing an
100  * exponential moving average for this specific slot. For instance,
101  * for values 1800, 1123, 1453, ... fall under the same slot (10) and
102  * the exponential moving average is computed every time a new value
103  * is stored at this slot.
104  *
105  * 3. Exponential Moving Average
106  *
107  * The EMA is largely used to track a signal for stocks or as a low
108  * pass filter. The magic of the formula, is it is very simple and the
109  * reactivity of the average can be tuned with the factors called
110  * alpha.
111  *
112  * The higher the alphas are, the faster the average respond to the
113  * signal change. In our case, if a slot in the array is a big
114  * interval, we can have numbers with a big difference between
115  * them. The impact of those differences in the average computation
116  * can be tuned by changing the alpha value.
117  *
118  *
119  *  -- The algorithm --
120  *
121  * We saw the different processing above, now let's see how they are
122  * used together.
123  *
124  * For each interrupt:
125  *      For each interval:
126  *              Compute the index = ilog2(interval)
127  *              Compute a new_ema(buffer[index], interval)
128  *              Store the index in a circular buffer
129  *
130  *      Compute the suffix array of the indexes
131  *
132  *      For each suffix:
133  *              If the suffix is reverse-found 3 times
134  *                      Return suffix
135  *
136  *      Return Not found
137  *
138  * However we can not have endless suffix array to be build, it won't
139  * make sense and it will add an extra overhead, so we can restrict
140  * this to a maximum suffix length of 5 and a minimum suffix length of
141  * 2. The experience showed 5 is the majority of the maximum pattern
142  * period found for different devices.
143  *
144  * The result is a pattern finding less than 1us for an interrupt.
145  *
146  * Example based on real values:
147  *
148  * Example 1 : MMC write/read interrupt interval:
149  *
150  *      223947, 1240, 1384, 1386, 1386,
151  *      217416, 1236, 1384, 1386, 1387,
152  *      214719, 1241, 1386, 1387, 1384,
153  *      213696, 1234, 1384, 1386, 1388,
154  *      219904, 1240, 1385, 1389, 1385,
155  *      212240, 1240, 1386, 1386, 1386,
156  *      214415, 1236, 1384, 1386, 1387,
157  *      214276, 1234, 1384, 1388, ?
158  *
159  * For each element, apply ilog2(value)
160  *
161  *      15, 8, 8, 8, 8,
162  *      15, 8, 8, 8, 8,
163  *      15, 8, 8, 8, 8,
164  *      15, 8, 8, 8, 8,
165  *      15, 8, 8, 8, 8,
166  *      15, 8, 8, 8, 8,
167  *      15, 8, 8, 8, 8,
168  *      15, 8, 8, 8, ?
169  *
170  * Max period of 5, we take the last (max_period * 3) 15 elements as
171  * we can be confident if the pattern repeats itself three times it is
172  * a repeating pattern.
173  *
174  *                   8,
175  *      15, 8, 8, 8, 8,
176  *      15, 8, 8, 8, 8,
177  *      15, 8, 8, 8, ?
178  *
179  * Suffixes are:
180  *
181  *  1) 8, 15, 8, 8, 8  <- max period
182  *  2) 8, 15, 8, 8
183  *  3) 8, 15, 8
184  *  4) 8, 15           <- min period
185  *
186  * From there we search the repeating pattern for each suffix.
187  *
188  * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
189  *         |   |  |  |  |  |   |  |  |  |  |   |  |  |  |
190  *         8, 15, 8, 8, 8  |   |  |  |  |  |   |  |  |  |
191  *                         8, 15, 8, 8, 8  |   |  |  |  |
192  *                                         8, 15, 8, 8, 8
193  *
194  * When moving the suffix, we found exactly 3 matches.
195  *
196  * The first suffix with period 5 is repeating.
197  *
198  * The next event is (3 * max_period) % suffix_period
199  *
200  * In this example, the result 0, so the next event is suffix[0] => 8
201  *
202  * However, 8 is the index in the array of exponential moving average
203  * which was calculated on the fly when storing the values, so the
204  * interval is ema[8] = 1366
205  *
206  *
207  * Example 2:
208  *
209  *      4, 3, 5, 100,
210  *      3, 3, 5, 117,
211  *      4, 4, 5, 112,
212  *      4, 3, 4, 110,
213  *      3, 5, 3, 117,
214  *      4, 4, 5, 112,
215  *      4, 3, 4, 110,
216  *      3, 4, 5, 112,
217  *      4, 3, 4, 110
218  *
219  * ilog2
220  *
221  *      0, 0, 0, 4,
222  *      0, 0, 0, 4,
223  *      0, 0, 0, 4,
224  *      0, 0, 0, 4,
225  *      0, 0, 0, 4,
226  *      0, 0, 0, 4,
227  *      0, 0, 0, 4,
228  *      0, 0, 0, 4,
229  *      0, 0, 0, 4
230  *
231  * Max period 5:
232  *         0, 0, 4,
233  *      0, 0, 0, 4,
234  *      0, 0, 0, 4,
235  *      0, 0, 0, 4
236  *
237  * Suffixes:
238  *
239  *  1) 0, 0, 4, 0, 0
240  *  2) 0, 0, 4, 0
241  *  3) 0, 0, 4
242  *  4) 0, 0
243  *
244  * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
245  *         |  |  |  |  |  |  X
246  *         0, 0, 4, 0, 0, |  X
247  *                        0, 0
248  *
249  * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
250  *         |  |  |  |  |  |  |  |  |  |  |  |  |  |  |
251  *         0, 0, 4, 0, |  |  |  |  |  |  |  |  |  |  |
252  *                     0, 0, 4, 0, |  |  |  |  |  |  |
253  *                                 0, 0, 4, 0, |  |  |
254  *                                             0  0  4
255  *
256  * Pattern is found 3 times, the remaining is 1 which results from
257  * (max_period * 3) % suffix_period. This value is the index in the
258  * suffix arrays. The suffix array for a period 4 has the value 4
259  * at index 1.
260  */
261 #define EMA_ALPHA_VAL           64
262 #define EMA_ALPHA_SHIFT         7
263 
264 #define PREDICTION_PERIOD_MIN   2
265 #define PREDICTION_PERIOD_MAX   5
266 #define PREDICTION_FACTOR       4
267 #define PREDICTION_MAX          10 /* 2 ^ PREDICTION_MAX useconds */
268 #define PREDICTION_BUFFER_SIZE  16 /* slots for EMAs, hardly more than 16 */
269 
270 struct irqt_stat {
271         u64     last_ts;
272         u64     ema_time[PREDICTION_BUFFER_SIZE];
273         int     timings[IRQ_TIMINGS_SIZE];
274         int     circ_timings[IRQ_TIMINGS_SIZE];
275         int     count;
276 };
277 
278 /*
279  * Exponential moving average computation
280  */
281 static u64 irq_timings_ema_new(u64 value, u64 ema_old)
282 {
283         s64 diff;
284 
285         if (unlikely(!ema_old))
286                 return value;
287 
288         diff = (value - ema_old) * EMA_ALPHA_VAL;
289         /*
290          * We can use a s64 type variable to be added with the u64
291          * ema_old variable as this one will never have its topmost
292          * bit set, it will be always smaller than 2^63 nanosec
293          * interrupt interval (292 years).
294          */
295         return ema_old + (diff >> EMA_ALPHA_SHIFT);
296 }
297 
298 static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
299 {
300         int i;
301 
302         /*
303          * The buffer contains the suite of intervals, in a ilog2
304          * basis, we are looking for a repetition. We point the
305          * beginning of the search three times the length of the
306          * period beginning at the end of the buffer. We do that for
307          * each suffix.
308          */
309         for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
310 
311                 int *begin = &buffer[len - (i * 3)];
312                 int *ptr = begin;
313 
314                 /*
315                  * We look if the suite with period 'i' repeat
316                  * itself. If it is truncated at the end, as it
317                  * repeats we can use the period to find out the next
318                  * element.
319                  */
320                 while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
321                         ptr += i;
322                         if (ptr >= &buffer[len])
323                                 return begin[((i * 3) % i)];
324                 }
325         }
326 
327         return -1;
328 }
329 
330 static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
331 {
332         int index, i, period_max, count, start, min = INT_MAX;
333 
334         if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
335                 irqs->count = irqs->last_ts = 0;
336                 return U64_MAX;
337         }
338 
339         /*
340          * As we want to find three times the repetition, we need a
341          * number of intervals greater or equal to three times the
342          * maximum period, otherwise we truncate the max period.
343          */
344         period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
345                 PREDICTION_PERIOD_MAX : irqs->count / 3;
346 
347         /*
348          * If we don't have enough irq timings for this prediction,
349          * just bail out.
350          */
351         if (period_max <= PREDICTION_PERIOD_MIN)
352                 return U64_MAX;
353 
354         /*
355          * 'count' will depends if the circular buffer wrapped or not
356          */
357         count = irqs->count < IRQ_TIMINGS_SIZE ?
358                 irqs->count : IRQ_TIMINGS_SIZE;
359 
360         start = irqs->count < IRQ_TIMINGS_SIZE ?
361                 0 : (irqs->count & IRQ_TIMINGS_MASK);
362 
363         /*
364          * Copy the content of the circular buffer into another buffer
365          * in order to linearize the buffer instead of dealing with
366          * wrapping indexes and shifted array which will be prone to
367          * error and extremelly difficult to debug.
368          */
369         for (i = 0; i < count; i++) {
370                 int index = (start + i) & IRQ_TIMINGS_MASK;
371 
372                 irqs->timings[i] = irqs->circ_timings[index];
373                 min = min_t(int, irqs->timings[i], min);
374         }
375 
376         index = irq_timings_next_event_index(irqs->timings, count, period_max);
377         if (index < 0)
378                 return irqs->last_ts + irqs->ema_time[min];
379 
380         return irqs->last_ts + irqs->ema_time[index];
381 }
382 
383 static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
384 {
385         u64 old_ts = irqs->last_ts;
386         u64 interval;
387         int index;
388 
389         /*
390          * The timestamps are absolute time values, we need to compute
391          * the timing interval between two interrupts.
392          */
393         irqs->last_ts = ts;
394 
395         /*
396          * The interval type is u64 in order to deal with the same
397          * type in our computation, that prevent mindfuck issues with
398          * overflow, sign and division.
399          */
400         interval = ts - old_ts;
401 
402         /*
403          * The interrupt triggered more than one second apart, that
404          * ends the sequence as predictible for our purpose. In this
405          * case, assume we have the beginning of a sequence and the
406          * timestamp is the first value. As it is impossible to
407          * predict anything at this point, return.
408          *
409          * Note the first timestamp of the sequence will always fall
410          * in this test because the old_ts is zero. That is what we
411          * want as we need another timestamp to compute an interval.
412          */
413         if (interval >= NSEC_PER_SEC) {
414                 irqs->count = 0;
415                 return;
416         }
417 
418         /*
419          * Get the index in the ema table for this interrupt. The
420          * PREDICTION_FACTOR increase the interval size for the array
421          * of exponential average.
422          */
423         index = likely(interval) ?
424                 ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
425 
426         /*
427          * Store the index as an element of the pattern in another
428          * circular array.
429          */
430         irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
431 
432         irqs->ema_time[index] = irq_timings_ema_new(interval,
433                                                     irqs->ema_time[index]);
434 
435         irqs->count++;
436 }
437 
438 /**
439  * irq_timings_next_event - Return when the next event is supposed to arrive
440  *
441  * During the last busy cycle, the number of interrupts is incremented
442  * and stored in the irq_timings structure. This information is
443  * necessary to:
444  *
445  * - know if the index in the table wrapped up:
446  *
447  *      If more than the array size interrupts happened during the
448  *      last busy/idle cycle, the index wrapped up and we have to
449  *      begin with the next element in the array which is the last one
450  *      in the sequence, otherwise it is a the index 0.
451  *
452  * - have an indication of the interrupts activity on this CPU
453  *   (eg. irq/sec)
454  *
455  * The values are 'consumed' after inserting in the statistical model,
456  * thus the count is reinitialized.
457  *
458  * The array of values **must** be browsed in the time direction, the
459  * timestamp must increase between an element and the next one.
460  *
461  * Returns a nanosec time based estimation of the earliest interrupt,
462  * U64_MAX otherwise.
463  */
464 u64 irq_timings_next_event(u64 now)
465 {
466         struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
467         struct irqt_stat *irqs;
468         struct irqt_stat __percpu *s;
469         u64 ts, next_evt = U64_MAX;
470         int i, irq = 0;
471 
472         /*
473          * This function must be called with the local irq disabled in
474          * order to prevent the timings circular buffer to be updated
475          * while we are reading it.
476          */
477         lockdep_assert_irqs_disabled();
478 
479         if (!irqts->count)
480                 return next_evt;
481 
482         /*
483          * Number of elements in the circular buffer: If it happens it
484          * was flushed before, then the number of elements could be
485          * smaller than IRQ_TIMINGS_SIZE, so the count is used,
486          * otherwise the array size is used as we wrapped. The index
487          * begins from zero when we did not wrap. That could be done
488          * in a nicer way with the proper circular array structure
489          * type but with the cost of extra computation in the
490          * interrupt handler hot path. We choose efficiency.
491          *
492          * Inject measured irq/timestamp to the pattern prediction
493          * model while decrementing the counter because we consume the
494          * data from our circular buffer.
495          */
496 
497         i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
498         irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
499 
500         for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
501                 irq = irq_timing_decode(irqts->values[i], &ts);
502                 s = idr_find(&irqt_stats, irq);
503                 if (s)
504                         irq_timings_store(irq, this_cpu_ptr(s), ts);
505         }
506 
507         /*
508          * Look in the list of interrupts' statistics, the earliest
509          * next event.
510          */
511         idr_for_each_entry(&irqt_stats, s, i) {
512 
513                 irqs = this_cpu_ptr(s);
514 
515                 ts = __irq_timings_next_event(irqs, i, now);
516                 if (ts <= now)
517                         return now;
518 
519                 if (ts < next_evt)
520                         next_evt = ts;
521         }
522 
523         return next_evt;
524 }
525 
526 void irq_timings_free(int irq)
527 {
528         struct irqt_stat __percpu *s;
529 
530         s = idr_find(&irqt_stats, irq);
531         if (s) {
532                 free_percpu(s);
533                 idr_remove(&irqt_stats, irq);
534         }
535 }
536 
537 int irq_timings_alloc(int irq)
538 {
539         struct irqt_stat __percpu *s;
540         int id;
541 
542         /*
543          * Some platforms can have the same private interrupt per cpu,
544          * so this function may be be called several times with the
545          * same interrupt number. Just bail out in case the per cpu
546          * stat structure is already allocated.
547          */
548         s = idr_find(&irqt_stats, irq);
549         if (s)
550                 return 0;
551 
552         s = alloc_percpu(*s);
553         if (!s)
554                 return -ENOMEM;
555 
556         idr_preload(GFP_KERNEL);
557         id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
558         idr_preload_end();
559 
560         if (id < 0) {
561                 free_percpu(s);
562                 return id;
563         }
564 
565         return 0;
566 }
567 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp