~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/cpu/perf_event_amd_ibs.c

Version: ~ [ linux-5.14-rc1 ] ~ [ linux-5.13.1 ] ~ [ linux-5.12.16 ] ~ [ linux-5.11.22 ] ~ [ linux-5.10.49 ] ~ [ linux-5.9.16 ] ~ [ linux-5.8.18 ] ~ [ linux-5.7.19 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.131 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.197 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.239 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.275 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.275 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.18.140 ] ~ [ linux-3.16.85 ] ~ [ linux-3.14.79 ] ~ [ linux-3.12.74 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Performance events - AMD IBS
  3  *
  4  *  Copyright (C) 2011 Advanced Micro Devices, Inc., Robert Richter
  5  *
  6  *  For licencing details see kernel-base/COPYING
  7  */
  8 
  9 #include <linux/perf_event.h>
 10 #include <linux/module.h>
 11 #include <linux/pci.h>
 12 #include <linux/ptrace.h>
 13 #include <linux/syscore_ops.h>
 14 
 15 #include <asm/apic.h>
 16 
 17 #include "perf_event.h"
 18 
 19 static u32 ibs_caps;
 20 
 21 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
 22 
 23 #include <linux/kprobes.h>
 24 #include <linux/hardirq.h>
 25 
 26 #include <asm/nmi.h>
 27 
 28 #define IBS_FETCH_CONFIG_MASK   (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT)
 29 #define IBS_OP_CONFIG_MASK      IBS_OP_MAX_CNT
 30 
 31 enum ibs_states {
 32         IBS_ENABLED     = 0,
 33         IBS_STARTED     = 1,
 34         IBS_STOPPING    = 2,
 35 
 36         IBS_MAX_STATES,
 37 };
 38 
 39 struct cpu_perf_ibs {
 40         struct perf_event       *event;
 41         unsigned long           state[BITS_TO_LONGS(IBS_MAX_STATES)];
 42 };
 43 
 44 struct perf_ibs {
 45         struct pmu                      pmu;
 46         unsigned int                    msr;
 47         u64                             config_mask;
 48         u64                             cnt_mask;
 49         u64                             enable_mask;
 50         u64                             valid_mask;
 51         u64                             max_period;
 52         unsigned long                   offset_mask[1];
 53         int                             offset_max;
 54         struct cpu_perf_ibs __percpu    *pcpu;
 55 
 56         struct attribute                **format_attrs;
 57         struct attribute_group          format_group;
 58         const struct attribute_group    *attr_groups[2];
 59 
 60         u64                             (*get_count)(u64 config);
 61 };
 62 
 63 struct perf_ibs_data {
 64         u32             size;
 65         union {
 66                 u32     data[0];        /* data buffer starts here */
 67                 u32     caps;
 68         };
 69         u64             regs[MSR_AMD64_IBS_REG_COUNT_MAX];
 70 };
 71 
 72 static int
 73 perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period)
 74 {
 75         s64 left = local64_read(&hwc->period_left);
 76         s64 period = hwc->sample_period;
 77         int overflow = 0;
 78 
 79         /*
 80          * If we are way outside a reasonable range then just skip forward:
 81          */
 82         if (unlikely(left <= -period)) {
 83                 left = period;
 84                 local64_set(&hwc->period_left, left);
 85                 hwc->last_period = period;
 86                 overflow = 1;
 87         }
 88 
 89         if (unlikely(left < (s64)min)) {
 90                 left += period;
 91                 local64_set(&hwc->period_left, left);
 92                 hwc->last_period = period;
 93                 overflow = 1;
 94         }
 95 
 96         /*
 97          * If the hw period that triggers the sw overflow is too short
 98          * we might hit the irq handler. This biases the results.
 99          * Thus we shorten the next-to-last period and set the last
100          * period to the max period.
101          */
102         if (left > max) {
103                 left -= max;
104                 if (left > max)
105                         left = max;
106                 else if (left < min)
107                         left = min;
108         }
109 
110         *hw_period = (u64)left;
111 
112         return overflow;
113 }
114 
115 static  int
116 perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width)
117 {
118         struct hw_perf_event *hwc = &event->hw;
119         int shift = 64 - width;
120         u64 prev_raw_count;
121         u64 delta;
122 
123         /*
124          * Careful: an NMI might modify the previous event value.
125          *
126          * Our tactic to handle this is to first atomically read and
127          * exchange a new raw count - then add that new-prev delta
128          * count to the generic event atomically:
129          */
130         prev_raw_count = local64_read(&hwc->prev_count);
131         if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
132                                         new_raw_count) != prev_raw_count)
133                 return 0;
134 
135         /*
136          * Now we have the new raw value and have updated the prev
137          * timestamp already. We can now calculate the elapsed delta
138          * (event-)time and add that to the generic event.
139          *
140          * Careful, not all hw sign-extends above the physical width
141          * of the count.
142          */
143         delta = (new_raw_count << shift) - (prev_raw_count << shift);
144         delta >>= shift;
145 
146         local64_add(delta, &event->count);
147         local64_sub(delta, &hwc->period_left);
148 
149         return 1;
150 }
151 
152 static struct perf_ibs perf_ibs_fetch;
153 static struct perf_ibs perf_ibs_op;
154 
155 static struct perf_ibs *get_ibs_pmu(int type)
156 {
157         if (perf_ibs_fetch.pmu.type == type)
158                 return &perf_ibs_fetch;
159         if (perf_ibs_op.pmu.type == type)
160                 return &perf_ibs_op;
161         return NULL;
162 }
163 
164 /*
165  * Use IBS for precise event sampling:
166  *
167  *  perf record -a -e cpu-cycles:p ...    # use ibs op counting cycle count
168  *  perf record -a -e r076:p ...          # same as -e cpu-cycles:p
169  *  perf record -a -e r0C1:p ...          # use ibs op counting micro-ops
170  *
171  * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl,
172  * MSRC001_1033) is used to select either cycle or micro-ops counting
173  * mode.
174  *
175  * The rip of IBS samples has skid 0. Thus, IBS supports precise
176  * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the
177  * rip is invalid when IBS was not able to record the rip correctly.
178  * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then.
179  *
180  */
181 static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
182 {
183         switch (event->attr.precise_ip) {
184         case 0:
185                 return -ENOENT;
186         case 1:
187         case 2:
188                 break;
189         default:
190                 return -EOPNOTSUPP;
191         }
192 
193         switch (event->attr.type) {
194         case PERF_TYPE_HARDWARE:
195                 switch (event->attr.config) {
196                 case PERF_COUNT_HW_CPU_CYCLES:
197                         *config = 0;
198                         return 0;
199                 }
200                 break;
201         case PERF_TYPE_RAW:
202                 switch (event->attr.config) {
203                 case 0x0076:
204                         *config = 0;
205                         return 0;
206                 case 0x00C1:
207                         *config = IBS_OP_CNT_CTL;
208                         return 0;
209                 }
210                 break;
211         default:
212                 return -ENOENT;
213         }
214 
215         return -EOPNOTSUPP;
216 }
217 
218 static const struct perf_event_attr ibs_notsupp = {
219         .exclude_user   = 1,
220         .exclude_kernel = 1,
221         .exclude_hv     = 1,
222         .exclude_idle   = 1,
223         .exclude_host   = 1,
224         .exclude_guest  = 1,
225 };
226 
227 static int perf_ibs_init(struct perf_event *event)
228 {
229         struct hw_perf_event *hwc = &event->hw;
230         struct perf_ibs *perf_ibs;
231         u64 max_cnt, config;
232         int ret;
233 
234         perf_ibs = get_ibs_pmu(event->attr.type);
235         if (perf_ibs) {
236                 config = event->attr.config;
237         } else {
238                 perf_ibs = &perf_ibs_op;
239                 ret = perf_ibs_precise_event(event, &config);
240                 if (ret)
241                         return ret;
242         }
243 
244         if (event->pmu != &perf_ibs->pmu)
245                 return -ENOENT;
246 
247         if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
248                 return -EINVAL;
249 
250         if (config & ~perf_ibs->config_mask)
251                 return -EINVAL;
252 
253         if (hwc->sample_period) {
254                 if (config & perf_ibs->cnt_mask)
255                         /* raw max_cnt may not be set */
256                         return -EINVAL;
257                 if (!event->attr.sample_freq && hwc->sample_period & 0x0f)
258                         /*
259                          * lower 4 bits can not be set in ibs max cnt,
260                          * but allowing it in case we adjust the
261                          * sample period to set a frequency.
262                          */
263                         return -EINVAL;
264                 hwc->sample_period &= ~0x0FULL;
265                 if (!hwc->sample_period)
266                         hwc->sample_period = 0x10;
267         } else {
268                 max_cnt = config & perf_ibs->cnt_mask;
269                 config &= ~perf_ibs->cnt_mask;
270                 event->attr.sample_period = max_cnt << 4;
271                 hwc->sample_period = event->attr.sample_period;
272         }
273 
274         if (!hwc->sample_period)
275                 return -EINVAL;
276 
277         /*
278          * If we modify hwc->sample_period, we also need to update
279          * hwc->last_period and hwc->period_left.
280          */
281         hwc->last_period = hwc->sample_period;
282         local64_set(&hwc->period_left, hwc->sample_period);
283 
284         hwc->config_base = perf_ibs->msr;
285         hwc->config = config;
286 
287         return 0;
288 }
289 
290 static int perf_ibs_set_period(struct perf_ibs *perf_ibs,
291                                struct hw_perf_event *hwc, u64 *period)
292 {
293         int overflow;
294 
295         /* ignore lower 4 bits in min count: */
296         overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period);
297         local64_set(&hwc->prev_count, 0);
298 
299         return overflow;
300 }
301 
302 static u64 get_ibs_fetch_count(u64 config)
303 {
304         return (config & IBS_FETCH_CNT) >> 12;
305 }
306 
307 static u64 get_ibs_op_count(u64 config)
308 {
309         u64 count = 0;
310 
311         if (config & IBS_OP_VAL)
312                 count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */
313 
314         if (ibs_caps & IBS_CAPS_RDWROPCNT)
315                 count += (config & IBS_OP_CUR_CNT) >> 32;
316 
317         return count;
318 }
319 
320 static void
321 perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event,
322                       u64 *config)
323 {
324         u64 count = perf_ibs->get_count(*config);
325 
326         /*
327          * Set width to 64 since we do not overflow on max width but
328          * instead on max count. In perf_ibs_set_period() we clear
329          * prev count manually on overflow.
330          */
331         while (!perf_event_try_update(event, count, 64)) {
332                 rdmsrl(event->hw.config_base, *config);
333                 count = perf_ibs->get_count(*config);
334         }
335 }
336 
337 static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs,
338                                          struct hw_perf_event *hwc, u64 config)
339 {
340         wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask);
341 }
342 
343 /*
344  * Erratum #420 Instruction-Based Sampling Engine May Generate
345  * Interrupt that Cannot Be Cleared:
346  *
347  * Must clear counter mask first, then clear the enable bit. See
348  * Revision Guide for AMD Family 10h Processors, Publication #41322.
349  */
350 static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs,
351                                           struct hw_perf_event *hwc, u64 config)
352 {
353         config &= ~perf_ibs->cnt_mask;
354         wrmsrl(hwc->config_base, config);
355         config &= ~perf_ibs->enable_mask;
356         wrmsrl(hwc->config_base, config);
357 }
358 
359 /*
360  * We cannot restore the ibs pmu state, so we always needs to update
361  * the event while stopping it and then reset the state when starting
362  * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in
363  * perf_ibs_start()/perf_ibs_stop() and instead always do it.
364  */
365 static void perf_ibs_start(struct perf_event *event, int flags)
366 {
367         struct hw_perf_event *hwc = &event->hw;
368         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
369         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
370         u64 period;
371 
372         if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
373                 return;
374 
375         WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE));
376         hwc->state = 0;
377 
378         perf_ibs_set_period(perf_ibs, hwc, &period);
379         set_bit(IBS_STARTED, pcpu->state);
380         perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
381 
382         perf_event_update_userpage(event);
383 }
384 
385 static void perf_ibs_stop(struct perf_event *event, int flags)
386 {
387         struct hw_perf_event *hwc = &event->hw;
388         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
389         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
390         u64 config;
391         int stopping;
392 
393         stopping = test_and_clear_bit(IBS_STARTED, pcpu->state);
394 
395         if (!stopping && (hwc->state & PERF_HES_UPTODATE))
396                 return;
397 
398         rdmsrl(hwc->config_base, config);
399 
400         if (stopping) {
401                 set_bit(IBS_STOPPING, pcpu->state);
402                 perf_ibs_disable_event(perf_ibs, hwc, config);
403                 WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
404                 hwc->state |= PERF_HES_STOPPED;
405         }
406 
407         if (hwc->state & PERF_HES_UPTODATE)
408                 return;
409 
410         /*
411          * Clear valid bit to not count rollovers on update, rollovers
412          * are only updated in the irq handler.
413          */
414         config &= ~perf_ibs->valid_mask;
415 
416         perf_ibs_event_update(perf_ibs, event, &config);
417         hwc->state |= PERF_HES_UPTODATE;
418 }
419 
420 static int perf_ibs_add(struct perf_event *event, int flags)
421 {
422         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
423         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
424 
425         if (test_and_set_bit(IBS_ENABLED, pcpu->state))
426                 return -ENOSPC;
427 
428         event->hw.state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
429 
430         pcpu->event = event;
431 
432         if (flags & PERF_EF_START)
433                 perf_ibs_start(event, PERF_EF_RELOAD);
434 
435         return 0;
436 }
437 
438 static void perf_ibs_del(struct perf_event *event, int flags)
439 {
440         struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu);
441         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
442 
443         if (!test_and_clear_bit(IBS_ENABLED, pcpu->state))
444                 return;
445 
446         perf_ibs_stop(event, PERF_EF_UPDATE);
447 
448         pcpu->event = NULL;
449 
450         perf_event_update_userpage(event);
451 }
452 
453 static void perf_ibs_read(struct perf_event *event) { }
454 
455 PMU_FORMAT_ATTR(rand_en,        "config:57");
456 PMU_FORMAT_ATTR(cnt_ctl,        "config:19");
457 
458 static struct attribute *ibs_fetch_format_attrs[] = {
459         &format_attr_rand_en.attr,
460         NULL,
461 };
462 
463 static struct attribute *ibs_op_format_attrs[] = {
464         NULL,   /* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
465         NULL,
466 };
467 
468 static struct perf_ibs perf_ibs_fetch = {
469         .pmu = {
470                 .task_ctx_nr    = perf_invalid_context,
471 
472                 .event_init     = perf_ibs_init,
473                 .add            = perf_ibs_add,
474                 .del            = perf_ibs_del,
475                 .start          = perf_ibs_start,
476                 .stop           = perf_ibs_stop,
477                 .read           = perf_ibs_read,
478         },
479         .msr                    = MSR_AMD64_IBSFETCHCTL,
480         .config_mask            = IBS_FETCH_CONFIG_MASK,
481         .cnt_mask               = IBS_FETCH_MAX_CNT,
482         .enable_mask            = IBS_FETCH_ENABLE,
483         .valid_mask             = IBS_FETCH_VAL,
484         .max_period             = IBS_FETCH_MAX_CNT << 4,
485         .offset_mask            = { MSR_AMD64_IBSFETCH_REG_MASK },
486         .offset_max             = MSR_AMD64_IBSFETCH_REG_COUNT,
487         .format_attrs           = ibs_fetch_format_attrs,
488 
489         .get_count              = get_ibs_fetch_count,
490 };
491 
492 static struct perf_ibs perf_ibs_op = {
493         .pmu = {
494                 .task_ctx_nr    = perf_invalid_context,
495 
496                 .event_init     = perf_ibs_init,
497                 .add            = perf_ibs_add,
498                 .del            = perf_ibs_del,
499                 .start          = perf_ibs_start,
500                 .stop           = perf_ibs_stop,
501                 .read           = perf_ibs_read,
502         },
503         .msr                    = MSR_AMD64_IBSOPCTL,
504         .config_mask            = IBS_OP_CONFIG_MASK,
505         .cnt_mask               = IBS_OP_MAX_CNT,
506         .enable_mask            = IBS_OP_ENABLE,
507         .valid_mask             = IBS_OP_VAL,
508         .max_period             = IBS_OP_MAX_CNT << 4,
509         .offset_mask            = { MSR_AMD64_IBSOP_REG_MASK },
510         .offset_max             = MSR_AMD64_IBSOP_REG_COUNT,
511         .format_attrs           = ibs_op_format_attrs,
512 
513         .get_count              = get_ibs_op_count,
514 };
515 
516 static int perf_ibs_handle_irq(struct perf_ibs *perf_ibs, struct pt_regs *iregs)
517 {
518         struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu);
519         struct perf_event *event = pcpu->event;
520         struct hw_perf_event *hwc = &event->hw;
521         struct perf_sample_data data;
522         struct perf_raw_record raw;
523         struct pt_regs regs;
524         struct perf_ibs_data ibs_data;
525         int offset, size, check_rip, offset_max, throttle = 0;
526         unsigned int msr;
527         u64 *buf, *config, period;
528 
529         if (!test_bit(IBS_STARTED, pcpu->state)) {
530                 /*
531                  * Catch spurious interrupts after stopping IBS: After
532                  * disabling IBS there could be still incoming NMIs
533                  * with samples that even have the valid bit cleared.
534                  * Mark all this NMIs as handled.
535                  */
536                 return test_and_clear_bit(IBS_STOPPING, pcpu->state) ? 1 : 0;
537         }
538 
539         msr = hwc->config_base;
540         buf = ibs_data.regs;
541         rdmsrl(msr, *buf);
542         if (!(*buf++ & perf_ibs->valid_mask))
543                 return 0;
544 
545         config = &ibs_data.regs[0];
546         perf_ibs_event_update(perf_ibs, event, config);
547         perf_sample_data_init(&data, 0, hwc->last_period);
548         if (!perf_ibs_set_period(perf_ibs, hwc, &period))
549                 goto out;       /* no sw counter overflow */
550 
551         ibs_data.caps = ibs_caps;
552         size = 1;
553         offset = 1;
554         check_rip = (perf_ibs == &perf_ibs_op && (ibs_caps & IBS_CAPS_RIPINVALIDCHK));
555         if (event->attr.sample_type & PERF_SAMPLE_RAW)
556                 offset_max = perf_ibs->offset_max;
557         else if (check_rip)
558                 offset_max = 2;
559         else
560                 offset_max = 1;
561         do {
562                 rdmsrl(msr + offset, *buf++);
563                 size++;
564                 offset = find_next_bit(perf_ibs->offset_mask,
565                                        perf_ibs->offset_max,
566                                        offset + 1);
567         } while (offset < offset_max);
568         ibs_data.size = sizeof(u64) * size;
569 
570         regs = *iregs;
571         if (check_rip && (ibs_data.regs[2] & IBS_RIP_INVALID)) {
572                 regs.flags &= ~PERF_EFLAGS_EXACT;
573         } else {
574                 set_linear_ip(&regs, ibs_data.regs[1]);
575                 regs.flags |= PERF_EFLAGS_EXACT;
576         }
577 
578         if (event->attr.sample_type & PERF_SAMPLE_RAW) {
579                 raw.size = sizeof(u32) + ibs_data.size;
580                 raw.data = ibs_data.data;
581                 data.raw = &raw;
582         }
583 
584         throttle = perf_event_overflow(event, &data, &regs);
585 out:
586         if (throttle)
587                 perf_ibs_disable_event(perf_ibs, hwc, *config);
588         else
589                 perf_ibs_enable_event(perf_ibs, hwc, period >> 4);
590 
591         perf_event_update_userpage(event);
592 
593         return 1;
594 }
595 
596 static int
597 perf_ibs_nmi_handler(unsigned int cmd, struct pt_regs *regs)
598 {
599         int handled = 0;
600 
601         handled += perf_ibs_handle_irq(&perf_ibs_fetch, regs);
602         handled += perf_ibs_handle_irq(&perf_ibs_op, regs);
603 
604         if (handled)
605                 inc_irq_stat(apic_perf_irqs);
606 
607         return handled;
608 }
609 NOKPROBE_SYMBOL(perf_ibs_nmi_handler);
610 
611 static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
612 {
613         struct cpu_perf_ibs __percpu *pcpu;
614         int ret;
615 
616         pcpu = alloc_percpu(struct cpu_perf_ibs);
617         if (!pcpu)
618                 return -ENOMEM;
619 
620         perf_ibs->pcpu = pcpu;
621 
622         /* register attributes */
623         if (perf_ibs->format_attrs[0]) {
624                 memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
625                 perf_ibs->format_group.name     = "format";
626                 perf_ibs->format_group.attrs    = perf_ibs->format_attrs;
627 
628                 memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
629                 perf_ibs->attr_groups[0]        = &perf_ibs->format_group;
630                 perf_ibs->pmu.attr_groups       = perf_ibs->attr_groups;
631         }
632 
633         ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
634         if (ret) {
635                 perf_ibs->pcpu = NULL;
636                 free_percpu(pcpu);
637         }
638 
639         return ret;
640 }
641 
642 static __init int perf_event_ibs_init(void)
643 {
644         struct attribute **attr = ibs_op_format_attrs;
645 
646         if (!ibs_caps)
647                 return -ENODEV; /* ibs not supported by the cpu */
648 
649         perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
650 
651         if (ibs_caps & IBS_CAPS_OPCNT) {
652                 perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
653                 *attr++ = &format_attr_cnt_ctl.attr;
654         }
655         perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
656 
657         register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
658         printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
659 
660         return 0;
661 }
662 
663 #else /* defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) */
664 
665 static __init int perf_event_ibs_init(void) { return 0; }
666 
667 #endif
668 
669 /* IBS - apic initialization, for perf and oprofile */
670 
671 static __init u32 __get_ibs_caps(void)
672 {
673         u32 caps;
674         unsigned int max_level;
675 
676         if (!boot_cpu_has(X86_FEATURE_IBS))
677                 return 0;
678 
679         /* check IBS cpuid feature flags */
680         max_level = cpuid_eax(0x80000000);
681         if (max_level < IBS_CPUID_FEATURES)
682                 return IBS_CAPS_DEFAULT;
683 
684         caps = cpuid_eax(IBS_CPUID_FEATURES);
685         if (!(caps & IBS_CAPS_AVAIL))
686                 /* cpuid flags not valid */
687                 return IBS_CAPS_DEFAULT;
688 
689         return caps;
690 }
691 
692 u32 get_ibs_caps(void)
693 {
694         return ibs_caps;
695 }
696 
697 EXPORT_SYMBOL(get_ibs_caps);
698 
699 static inline int get_eilvt(int offset)
700 {
701         return !setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 1);
702 }
703 
704 static inline int put_eilvt(int offset)
705 {
706         return !setup_APIC_eilvt(offset, 0, 0, 1);
707 }
708 
709 /*
710  * Check and reserve APIC extended interrupt LVT offset for IBS if available.
711  */
712 static inline int ibs_eilvt_valid(void)
713 {
714         int offset;
715         u64 val;
716         int valid = 0;
717 
718         preempt_disable();
719 
720         rdmsrl(MSR_AMD64_IBSCTL, val);
721         offset = val & IBSCTL_LVT_OFFSET_MASK;
722 
723         if (!(val & IBSCTL_LVT_OFFSET_VALID)) {
724                 pr_err(FW_BUG "cpu %d, invalid IBS interrupt offset %d (MSR%08X=0x%016llx)\n",
725                        smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
726                 goto out;
727         }
728 
729         if (!get_eilvt(offset)) {
730                 pr_err(FW_BUG "cpu %d, IBS interrupt offset %d not available (MSR%08X=0x%016llx)\n",
731                        smp_processor_id(), offset, MSR_AMD64_IBSCTL, val);
732                 goto out;
733         }
734 
735         valid = 1;
736 out:
737         preempt_enable();
738 
739         return valid;
740 }
741 
742 static int setup_ibs_ctl(int ibs_eilvt_off)
743 {
744         struct pci_dev *cpu_cfg;
745         int nodes;
746         u32 value = 0;
747 
748         nodes = 0;
749         cpu_cfg = NULL;
750         do {
751                 cpu_cfg = pci_get_device(PCI_VENDOR_ID_AMD,
752                                          PCI_DEVICE_ID_AMD_10H_NB_MISC,
753                                          cpu_cfg);
754                 if (!cpu_cfg)
755                         break;
756                 ++nodes;
757                 pci_write_config_dword(cpu_cfg, IBSCTL, ibs_eilvt_off
758                                        | IBSCTL_LVT_OFFSET_VALID);
759                 pci_read_config_dword(cpu_cfg, IBSCTL, &value);
760                 if (value != (ibs_eilvt_off | IBSCTL_LVT_OFFSET_VALID)) {
761                         pci_dev_put(cpu_cfg);
762                         printk(KERN_DEBUG "Failed to setup IBS LVT offset, "
763                                "IBSCTL = 0x%08x\n", value);
764                         return -EINVAL;
765                 }
766         } while (1);
767 
768         if (!nodes) {
769                 printk(KERN_DEBUG "No CPU node configured for IBS\n");
770                 return -ENODEV;
771         }
772 
773         return 0;
774 }
775 
776 /*
777  * This runs only on the current cpu. We try to find an LVT offset and
778  * setup the local APIC. For this we must disable preemption. On
779  * success we initialize all nodes with this offset. This updates then
780  * the offset in the IBS_CTL per-node msr. The per-core APIC setup of
781  * the IBS interrupt vector is handled by perf_ibs_cpu_notifier that
782  * is using the new offset.
783  */
784 static int force_ibs_eilvt_setup(void)
785 {
786         int offset;
787         int ret;
788 
789         preempt_disable();
790         /* find the next free available EILVT entry, skip offset 0 */
791         for (offset = 1; offset < APIC_EILVT_NR_MAX; offset++) {
792                 if (get_eilvt(offset))
793                         break;
794         }
795         preempt_enable();
796 
797         if (offset == APIC_EILVT_NR_MAX) {
798                 printk(KERN_DEBUG "No EILVT entry available\n");
799                 return -EBUSY;
800         }
801 
802         ret = setup_ibs_ctl(offset);
803         if (ret)
804                 goto out;
805 
806         if (!ibs_eilvt_valid()) {
807                 ret = -EFAULT;
808                 goto out;
809         }
810 
811         pr_info("IBS: LVT offset %d assigned\n", offset);
812 
813         return 0;
814 out:
815         preempt_disable();
816         put_eilvt(offset);
817         preempt_enable();
818         return ret;
819 }
820 
821 static void ibs_eilvt_setup(void)
822 {
823         /*
824          * Force LVT offset assignment for family 10h: The offsets are
825          * not assigned by the BIOS for this family, so the OS is
826          * responsible for doing it. If the OS assignment fails, fall
827          * back to BIOS settings and try to setup this.
828          */
829         if (boot_cpu_data.x86 == 0x10)
830                 force_ibs_eilvt_setup();
831 }
832 
833 static inline int get_ibs_lvt_offset(void)
834 {
835         u64 val;
836 
837         rdmsrl(MSR_AMD64_IBSCTL, val);
838         if (!(val & IBSCTL_LVT_OFFSET_VALID))
839                 return -EINVAL;
840 
841         return val & IBSCTL_LVT_OFFSET_MASK;
842 }
843 
844 static void setup_APIC_ibs(void *dummy)
845 {
846         int offset;
847 
848         offset = get_ibs_lvt_offset();
849         if (offset < 0)
850                 goto failed;
851 
852         if (!setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_NMI, 0))
853                 return;
854 failed:
855         pr_warn("perf: IBS APIC setup failed on cpu #%d\n",
856                 smp_processor_id());
857 }
858 
859 static void clear_APIC_ibs(void *dummy)
860 {
861         int offset;
862 
863         offset = get_ibs_lvt_offset();
864         if (offset >= 0)
865                 setup_APIC_eilvt(offset, 0, APIC_EILVT_MSG_FIX, 1);
866 }
867 
868 #ifdef CONFIG_PM
869 
870 static int perf_ibs_suspend(void)
871 {
872         clear_APIC_ibs(NULL);
873         return 0;
874 }
875 
876 static void perf_ibs_resume(void)
877 {
878         ibs_eilvt_setup();
879         setup_APIC_ibs(NULL);
880 }
881 
882 static struct syscore_ops perf_ibs_syscore_ops = {
883         .resume         = perf_ibs_resume,
884         .suspend        = perf_ibs_suspend,
885 };
886 
887 static void perf_ibs_pm_init(void)
888 {
889         register_syscore_ops(&perf_ibs_syscore_ops);
890 }
891 
892 #else
893 
894 static inline void perf_ibs_pm_init(void) { }
895 
896 #endif
897 
898 static int
899 perf_ibs_cpu_notifier(struct notifier_block *self, unsigned long action, void *hcpu)
900 {
901         switch (action & ~CPU_TASKS_FROZEN) {
902         case CPU_STARTING:
903                 setup_APIC_ibs(NULL);
904                 break;
905         case CPU_DYING:
906                 clear_APIC_ibs(NULL);
907                 break;
908         default:
909                 break;
910         }
911 
912         return NOTIFY_OK;
913 }
914 
915 static __init int amd_ibs_init(void)
916 {
917         u32 caps;
918         int ret = -EINVAL;
919 
920         caps = __get_ibs_caps();
921         if (!caps)
922                 return -ENODEV; /* ibs not supported by the cpu */
923 
924         ibs_eilvt_setup();
925 
926         if (!ibs_eilvt_valid())
927                 goto out;
928 
929         perf_ibs_pm_init();
930         cpu_notifier_register_begin();
931         ibs_caps = caps;
932         /* make ibs_caps visible to other cpus: */
933         smp_mb();
934         smp_call_function(setup_APIC_ibs, NULL, 1);
935         __perf_cpu_notifier(perf_ibs_cpu_notifier);
936         cpu_notifier_register_done();
937 
938         ret = perf_event_ibs_init();
939 out:
940         if (ret)
941                 pr_err("Failed to setup IBS, %d\n", ret);
942         return ret;
943 }
944 
945 /* Since we need the pci subsystem to init ibs we can't do this earlier: */
946 device_initcall(amd_ibs_init);
947 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp