~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/cpu/perf_event_intel_lbr.c

Version: ~ [ linux-5.6 ] ~ [ linux-5.5.13 ] ~ [ linux-5.4.28 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.113 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.174 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.217 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.217 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.82 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 #include <linux/perf_event.h>
  2 #include <linux/types.h>
  3 
  4 #include <asm/perf_event.h>
  5 #include <asm/msr.h>
  6 #include <asm/insn.h>
  7 
  8 #include "perf_event.h"
  9 
 10 enum {
 11         LBR_FORMAT_32           = 0x00,
 12         LBR_FORMAT_LIP          = 0x01,
 13         LBR_FORMAT_EIP          = 0x02,
 14         LBR_FORMAT_EIP_FLAGS    = 0x03,
 15         LBR_FORMAT_EIP_FLAGS2   = 0x04,
 16         LBR_FORMAT_MAX_KNOWN    = LBR_FORMAT_EIP_FLAGS2,
 17 };
 18 
 19 static enum {
 20         LBR_EIP_FLAGS           = 1,
 21         LBR_TSX                 = 2,
 22 } lbr_desc[LBR_FORMAT_MAX_KNOWN + 1] = {
 23         [LBR_FORMAT_EIP_FLAGS]  = LBR_EIP_FLAGS,
 24         [LBR_FORMAT_EIP_FLAGS2] = LBR_EIP_FLAGS | LBR_TSX,
 25 };
 26 
 27 /*
 28  * Intel LBR_SELECT bits
 29  * Intel Vol3a, April 2011, Section 16.7 Table 16-10
 30  *
 31  * Hardware branch filter (not available on all CPUs)
 32  */
 33 #define LBR_KERNEL_BIT          0 /* do not capture at ring0 */
 34 #define LBR_USER_BIT            1 /* do not capture at ring > 0 */
 35 #define LBR_JCC_BIT             2 /* do not capture conditional branches */
 36 #define LBR_REL_CALL_BIT        3 /* do not capture relative calls */
 37 #define LBR_IND_CALL_BIT        4 /* do not capture indirect calls */
 38 #define LBR_RETURN_BIT          5 /* do not capture near returns */
 39 #define LBR_IND_JMP_BIT         6 /* do not capture indirect jumps */
 40 #define LBR_REL_JMP_BIT         7 /* do not capture relative jumps */
 41 #define LBR_FAR_BIT             8 /* do not capture far branches */
 42 
 43 #define LBR_KERNEL      (1 << LBR_KERNEL_BIT)
 44 #define LBR_USER        (1 << LBR_USER_BIT)
 45 #define LBR_JCC         (1 << LBR_JCC_BIT)
 46 #define LBR_REL_CALL    (1 << LBR_REL_CALL_BIT)
 47 #define LBR_IND_CALL    (1 << LBR_IND_CALL_BIT)
 48 #define LBR_RETURN      (1 << LBR_RETURN_BIT)
 49 #define LBR_REL_JMP     (1 << LBR_REL_JMP_BIT)
 50 #define LBR_IND_JMP     (1 << LBR_IND_JMP_BIT)
 51 #define LBR_FAR         (1 << LBR_FAR_BIT)
 52 
 53 #define LBR_PLM (LBR_KERNEL | LBR_USER)
 54 
 55 #define LBR_SEL_MASK    0x1ff   /* valid bits in LBR_SELECT */
 56 #define LBR_NOT_SUPP    -1      /* LBR filter not supported */
 57 #define LBR_IGN         0       /* ignored */
 58 
 59 #define LBR_ANY          \
 60         (LBR_JCC        |\
 61          LBR_REL_CALL   |\
 62          LBR_IND_CALL   |\
 63          LBR_RETURN     |\
 64          LBR_REL_JMP    |\
 65          LBR_IND_JMP    |\
 66          LBR_FAR)
 67 
 68 #define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
 69 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 70 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 71 
 72 #define for_each_branch_sample_type(x) \
 73         for ((x) = PERF_SAMPLE_BRANCH_USER; \
 74              (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
 75 
 76 /*
 77  * x86control flow change classification
 78  * x86control flow changes include branches, interrupts, traps, faults
 79  */
 80 enum {
 81         X86_BR_NONE     = 0,      /* unknown */
 82 
 83         X86_BR_USER     = 1 << 0, /* branch target is user */
 84         X86_BR_KERNEL   = 1 << 1, /* branch target is kernel */
 85 
 86         X86_BR_CALL     = 1 << 2, /* call */
 87         X86_BR_RET      = 1 << 3, /* return */
 88         X86_BR_SYSCALL  = 1 << 4, /* syscall */
 89         X86_BR_SYSRET   = 1 << 5, /* syscall return */
 90         X86_BR_INT      = 1 << 6, /* sw interrupt */
 91         X86_BR_IRET     = 1 << 7, /* return from interrupt */
 92         X86_BR_JCC      = 1 << 8, /* conditional */
 93         X86_BR_JMP      = 1 << 9, /* jump */
 94         X86_BR_IRQ      = 1 << 10,/* hw interrupt or trap or fault */
 95         X86_BR_IND_CALL = 1 << 11,/* indirect calls */
 96         X86_BR_ABORT    = 1 << 12,/* transaction abort */
 97         X86_BR_IN_TX    = 1 << 13,/* in transaction */
 98         X86_BR_NO_TX    = 1 << 14,/* not in transaction */
 99 };
100 
101 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
102 #define X86_BR_ANYTX (X86_BR_NO_TX | X86_BR_IN_TX)
103 
104 #define X86_BR_ANY       \
105         (X86_BR_CALL    |\
106          X86_BR_RET     |\
107          X86_BR_SYSCALL |\
108          X86_BR_SYSRET  |\
109          X86_BR_INT     |\
110          X86_BR_IRET    |\
111          X86_BR_JCC     |\
112          X86_BR_JMP      |\
113          X86_BR_IRQ      |\
114          X86_BR_ABORT    |\
115          X86_BR_IND_CALL)
116 
117 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
118 
119 #define X86_BR_ANY_CALL          \
120         (X86_BR_CALL            |\
121          X86_BR_IND_CALL        |\
122          X86_BR_SYSCALL         |\
123          X86_BR_IRQ             |\
124          X86_BR_INT)
125 
126 static void intel_pmu_lbr_filter(struct cpu_hw_events *cpuc);
127 
128 /*
129  * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
130  * otherwise it becomes near impossible to get a reliable stack.
131  */
132 
133 static void __intel_pmu_lbr_enable(void)
134 {
135         u64 debugctl;
136         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
137 
138         if (cpuc->lbr_sel)
139                 wrmsrl(MSR_LBR_SELECT, cpuc->lbr_sel->config);
140 
141         rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
142         debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
143         wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
144 }
145 
146 static void __intel_pmu_lbr_disable(void)
147 {
148         u64 debugctl;
149 
150         rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
151         debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
152         wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
153 }
154 
155 static void intel_pmu_lbr_reset_32(void)
156 {
157         int i;
158 
159         for (i = 0; i < x86_pmu.lbr_nr; i++)
160                 wrmsrl(x86_pmu.lbr_from + i, 0);
161 }
162 
163 static void intel_pmu_lbr_reset_64(void)
164 {
165         int i;
166 
167         for (i = 0; i < x86_pmu.lbr_nr; i++) {
168                 wrmsrl(x86_pmu.lbr_from + i, 0);
169                 wrmsrl(x86_pmu.lbr_to   + i, 0);
170         }
171 }
172 
173 void intel_pmu_lbr_reset(void)
174 {
175         if (!x86_pmu.lbr_nr)
176                 return;
177 
178         if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
179                 intel_pmu_lbr_reset_32();
180         else
181                 intel_pmu_lbr_reset_64();
182 }
183 
184 void intel_pmu_lbr_enable(struct perf_event *event)
185 {
186         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
187 
188         if (!x86_pmu.lbr_nr)
189                 return;
190 
191         /*
192          * Reset the LBR stack if we changed task context to
193          * avoid data leaks.
194          */
195         if (event->ctx->task && cpuc->lbr_context != event->ctx) {
196                 intel_pmu_lbr_reset();
197                 cpuc->lbr_context = event->ctx;
198         }
199         cpuc->br_sel = event->hw.branch_reg.reg;
200 
201         cpuc->lbr_users++;
202 }
203 
204 void intel_pmu_lbr_disable(struct perf_event *event)
205 {
206         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
207 
208         if (!x86_pmu.lbr_nr)
209                 return;
210 
211         cpuc->lbr_users--;
212         WARN_ON_ONCE(cpuc->lbr_users < 0);
213 
214         if (cpuc->enabled && !cpuc->lbr_users) {
215                 __intel_pmu_lbr_disable();
216                 /* avoid stale pointer */
217                 cpuc->lbr_context = NULL;
218         }
219 }
220 
221 void intel_pmu_lbr_enable_all(void)
222 {
223         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
224 
225         if (cpuc->lbr_users)
226                 __intel_pmu_lbr_enable();
227 }
228 
229 void intel_pmu_lbr_disable_all(void)
230 {
231         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
232 
233         if (cpuc->lbr_users)
234                 __intel_pmu_lbr_disable();
235 }
236 
237 /*
238  * TOS = most recently recorded branch
239  */
240 static inline u64 intel_pmu_lbr_tos(void)
241 {
242         u64 tos;
243 
244         rdmsrl(x86_pmu.lbr_tos, tos);
245 
246         return tos;
247 }
248 
249 static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
250 {
251         unsigned long mask = x86_pmu.lbr_nr - 1;
252         u64 tos = intel_pmu_lbr_tos();
253         int i;
254 
255         for (i = 0; i < x86_pmu.lbr_nr; i++) {
256                 unsigned long lbr_idx = (tos - i) & mask;
257                 union {
258                         struct {
259                                 u32 from;
260                                 u32 to;
261                         };
262                         u64     lbr;
263                 } msr_lastbranch;
264 
265                 rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
266 
267                 cpuc->lbr_entries[i].from       = msr_lastbranch.from;
268                 cpuc->lbr_entries[i].to         = msr_lastbranch.to;
269                 cpuc->lbr_entries[i].mispred    = 0;
270                 cpuc->lbr_entries[i].predicted  = 0;
271                 cpuc->lbr_entries[i].in_tx      = 0;
272                 cpuc->lbr_entries[i].abort      = 0;
273                 cpuc->lbr_entries[i].reserved   = 0;
274         }
275         cpuc->lbr_stack.nr = i;
276 }
277 
278 /*
279  * Due to lack of segmentation in Linux the effective address (offset)
280  * is the same as the linear address, allowing us to merge the LIP and EIP
281  * LBR formats.
282  */
283 static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
284 {
285         unsigned long mask = x86_pmu.lbr_nr - 1;
286         int lbr_format = x86_pmu.intel_cap.lbr_format;
287         u64 tos = intel_pmu_lbr_tos();
288         int i;
289 
290         for (i = 0; i < x86_pmu.lbr_nr; i++) {
291                 unsigned long lbr_idx = (tos - i) & mask;
292                 u64 from, to, mis = 0, pred = 0, in_tx = 0, abort = 0;
293                 int skip = 0;
294                 int lbr_flags = lbr_desc[lbr_format];
295 
296                 rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
297                 rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
298 
299                 if (lbr_flags & LBR_EIP_FLAGS) {
300                         mis = !!(from & LBR_FROM_FLAG_MISPRED);
301                         pred = !mis;
302                         skip = 1;
303                 }
304                 if (lbr_flags & LBR_TSX) {
305                         in_tx = !!(from & LBR_FROM_FLAG_IN_TX);
306                         abort = !!(from & LBR_FROM_FLAG_ABORT);
307                         skip = 3;
308                 }
309                 from = (u64)((((s64)from) << skip) >> skip);
310 
311                 cpuc->lbr_entries[i].from       = from;
312                 cpuc->lbr_entries[i].to         = to;
313                 cpuc->lbr_entries[i].mispred    = mis;
314                 cpuc->lbr_entries[i].predicted  = pred;
315                 cpuc->lbr_entries[i].in_tx      = in_tx;
316                 cpuc->lbr_entries[i].abort      = abort;
317                 cpuc->lbr_entries[i].reserved   = 0;
318         }
319         cpuc->lbr_stack.nr = i;
320 }
321 
322 void intel_pmu_lbr_read(void)
323 {
324         struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
325 
326         if (!cpuc->lbr_users)
327                 return;
328 
329         if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32)
330                 intel_pmu_lbr_read_32(cpuc);
331         else
332                 intel_pmu_lbr_read_64(cpuc);
333 
334         intel_pmu_lbr_filter(cpuc);
335 }
336 
337 /*
338  * SW filter is used:
339  * - in case there is no HW filter
340  * - in case the HW filter has errata or limitations
341  */
342 static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
343 {
344         u64 br_type = event->attr.branch_sample_type;
345         int mask = 0;
346 
347         if (br_type & PERF_SAMPLE_BRANCH_USER)
348                 mask |= X86_BR_USER;
349 
350         if (br_type & PERF_SAMPLE_BRANCH_KERNEL)
351                 mask |= X86_BR_KERNEL;
352 
353         /* we ignore BRANCH_HV here */
354 
355         if (br_type & PERF_SAMPLE_BRANCH_ANY)
356                 mask |= X86_BR_ANY;
357 
358         if (br_type & PERF_SAMPLE_BRANCH_ANY_CALL)
359                 mask |= X86_BR_ANY_CALL;
360 
361         if (br_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
362                 mask |= X86_BR_RET | X86_BR_IRET | X86_BR_SYSRET;
363 
364         if (br_type & PERF_SAMPLE_BRANCH_IND_CALL)
365                 mask |= X86_BR_IND_CALL;
366 
367         if (br_type & PERF_SAMPLE_BRANCH_ABORT_TX)
368                 mask |= X86_BR_ABORT;
369 
370         if (br_type & PERF_SAMPLE_BRANCH_IN_TX)
371                 mask |= X86_BR_IN_TX;
372 
373         if (br_type & PERF_SAMPLE_BRANCH_NO_TX)
374                 mask |= X86_BR_NO_TX;
375 
376         /*
377          * stash actual user request into reg, it may
378          * be used by fixup code for some CPU
379          */
380         event->hw.branch_reg.reg = mask;
381 }
382 
383 /*
384  * setup the HW LBR filter
385  * Used only when available, may not be enough to disambiguate
386  * all branches, may need the help of the SW filter
387  */
388 static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
389 {
390         struct hw_perf_event_extra *reg;
391         u64 br_type = event->attr.branch_sample_type;
392         u64 mask = 0, m;
393         u64 v;
394 
395         for_each_branch_sample_type(m) {
396                 if (!(br_type & m))
397                         continue;
398 
399                 v = x86_pmu.lbr_sel_map[m];
400                 if (v == LBR_NOT_SUPP)
401                         return -EOPNOTSUPP;
402 
403                 if (v != LBR_IGN)
404                         mask |= v;
405         }
406         reg = &event->hw.branch_reg;
407         reg->idx = EXTRA_REG_LBR;
408 
409         /* LBR_SELECT operates in suppress mode so invert mask */
410         reg->config = ~mask & x86_pmu.lbr_sel_mask;
411 
412         return 0;
413 }
414 
415 int intel_pmu_setup_lbr_filter(struct perf_event *event)
416 {
417         int ret = 0;
418 
419         /*
420          * no LBR on this PMU
421          */
422         if (!x86_pmu.lbr_nr)
423                 return -EOPNOTSUPP;
424 
425         /*
426          * setup SW LBR filter
427          */
428         intel_pmu_setup_sw_lbr_filter(event);
429 
430         /*
431          * setup HW LBR filter, if any
432          */
433         if (x86_pmu.lbr_sel_map)
434                 ret = intel_pmu_setup_hw_lbr_filter(event);
435 
436         return ret;
437 }
438 
439 /*
440  * return the type of control flow change at address "from"
441  * intruction is not necessarily a branch (in case of interrupt).
442  *
443  * The branch type returned also includes the priv level of the
444  * target of the control flow change (X86_BR_USER, X86_BR_KERNEL).
445  *
446  * If a branch type is unknown OR the instruction cannot be
447  * decoded (e.g., text page not present), then X86_BR_NONE is
448  * returned.
449  */
450 static int branch_type(unsigned long from, unsigned long to, int abort)
451 {
452         struct insn insn;
453         void *addr;
454         int bytes, size = MAX_INSN_SIZE;
455         int ret = X86_BR_NONE;
456         int ext, to_plm, from_plm;
457         u8 buf[MAX_INSN_SIZE];
458         int is64 = 0;
459 
460         to_plm = kernel_ip(to) ? X86_BR_KERNEL : X86_BR_USER;
461         from_plm = kernel_ip(from) ? X86_BR_KERNEL : X86_BR_USER;
462 
463         /*
464          * maybe zero if lbr did not fill up after a reset by the time
465          * we get a PMU interrupt
466          */
467         if (from == 0 || to == 0)
468                 return X86_BR_NONE;
469 
470         if (abort)
471                 return X86_BR_ABORT | to_plm;
472 
473         if (from_plm == X86_BR_USER) {
474                 /*
475                  * can happen if measuring at the user level only
476                  * and we interrupt in a kernel thread, e.g., idle.
477                  */
478                 if (!current->mm)
479                         return X86_BR_NONE;
480 
481                 /* may fail if text not present */
482                 bytes = copy_from_user_nmi(buf, (void __user *)from, size);
483                 if (bytes != size)
484                         return X86_BR_NONE;
485 
486                 addr = buf;
487         } else {
488                 /*
489                  * The LBR logs any address in the IP, even if the IP just
490                  * faulted. This means userspace can control the from address.
491                  * Ensure we don't blindy read any address by validating it is
492                  * a known text address.
493                  */
494                 if (kernel_text_address(from))
495                         addr = (void *)from;
496                 else
497                         return X86_BR_NONE;
498         }
499 
500         /*
501          * decoder needs to know the ABI especially
502          * on 64-bit systems running 32-bit apps
503          */
504 #ifdef CONFIG_X86_64
505         is64 = kernel_ip((unsigned long)addr) || !test_thread_flag(TIF_IA32);
506 #endif
507         insn_init(&insn, addr, is64);
508         insn_get_opcode(&insn);
509 
510         switch (insn.opcode.bytes[0]) {
511         case 0xf:
512                 switch (insn.opcode.bytes[1]) {
513                 case 0x05: /* syscall */
514                 case 0x34: /* sysenter */
515                         ret = X86_BR_SYSCALL;
516                         break;
517                 case 0x07: /* sysret */
518                 case 0x35: /* sysexit */
519                         ret = X86_BR_SYSRET;
520                         break;
521                 case 0x80 ... 0x8f: /* conditional */
522                         ret = X86_BR_JCC;
523                         break;
524                 default:
525                         ret = X86_BR_NONE;
526                 }
527                 break;
528         case 0x70 ... 0x7f: /* conditional */
529                 ret = X86_BR_JCC;
530                 break;
531         case 0xc2: /* near ret */
532         case 0xc3: /* near ret */
533         case 0xca: /* far ret */
534         case 0xcb: /* far ret */
535                 ret = X86_BR_RET;
536                 break;
537         case 0xcf: /* iret */
538                 ret = X86_BR_IRET;
539                 break;
540         case 0xcc ... 0xce: /* int */
541                 ret = X86_BR_INT;
542                 break;
543         case 0xe8: /* call near rel */
544         case 0x9a: /* call far absolute */
545                 ret = X86_BR_CALL;
546                 break;
547         case 0xe0 ... 0xe3: /* loop jmp */
548                 ret = X86_BR_JCC;
549                 break;
550         case 0xe9 ... 0xeb: /* jmp */
551                 ret = X86_BR_JMP;
552                 break;
553         case 0xff: /* call near absolute, call far absolute ind */
554                 insn_get_modrm(&insn);
555                 ext = (insn.modrm.bytes[0] >> 3) & 0x7;
556                 switch (ext) {
557                 case 2: /* near ind call */
558                 case 3: /* far ind call */
559                         ret = X86_BR_IND_CALL;
560                         break;
561                 case 4:
562                 case 5:
563                         ret = X86_BR_JMP;
564                         break;
565                 }
566                 break;
567         default:
568                 ret = X86_BR_NONE;
569         }
570         /*
571          * interrupts, traps, faults (and thus ring transition) may
572          * occur on any instructions. Thus, to classify them correctly,
573          * we need to first look at the from and to priv levels. If they
574          * are different and to is in the kernel, then it indicates
575          * a ring transition. If the from instruction is not a ring
576          * transition instr (syscall, systenter, int), then it means
577          * it was a irq, trap or fault.
578          *
579          * we have no way of detecting kernel to kernel faults.
580          */
581         if (from_plm == X86_BR_USER && to_plm == X86_BR_KERNEL
582             && ret != X86_BR_SYSCALL && ret != X86_BR_INT)
583                 ret = X86_BR_IRQ;
584 
585         /*
586          * branch priv level determined by target as
587          * is done by HW when LBR_SELECT is implemented
588          */
589         if (ret != X86_BR_NONE)
590                 ret |= to_plm;
591 
592         return ret;
593 }
594 
595 /*
596  * implement actual branch filter based on user demand.
597  * Hardware may not exactly satisfy that request, thus
598  * we need to inspect opcodes. Mismatched branches are
599  * discarded. Therefore, the number of branches returned
600  * in PERF_SAMPLE_BRANCH_STACK sample may vary.
601  */
602 static void
603 intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
604 {
605         u64 from, to;
606         int br_sel = cpuc->br_sel;
607         int i, j, type;
608         bool compress = false;
609 
610         /* if sampling all branches, then nothing to filter */
611         if ((br_sel & X86_BR_ALL) == X86_BR_ALL)
612                 return;
613 
614         for (i = 0; i < cpuc->lbr_stack.nr; i++) {
615 
616                 from = cpuc->lbr_entries[i].from;
617                 to = cpuc->lbr_entries[i].to;
618 
619                 type = branch_type(from, to, cpuc->lbr_entries[i].abort);
620                 if (type != X86_BR_NONE && (br_sel & X86_BR_ANYTX)) {
621                         if (cpuc->lbr_entries[i].in_tx)
622                                 type |= X86_BR_IN_TX;
623                         else
624                                 type |= X86_BR_NO_TX;
625                 }
626 
627                 /* if type does not correspond, then discard */
628                 if (type == X86_BR_NONE || (br_sel & type) != type) {
629                         cpuc->lbr_entries[i].from = 0;
630                         compress = true;
631                 }
632         }
633 
634         if (!compress)
635                 return;
636 
637         /* remove all entries with from=0 */
638         for (i = 0; i < cpuc->lbr_stack.nr; ) {
639                 if (!cpuc->lbr_entries[i].from) {
640                         j = i;
641                         while (++j < cpuc->lbr_stack.nr)
642                                 cpuc->lbr_entries[j-1] = cpuc->lbr_entries[j];
643                         cpuc->lbr_stack.nr--;
644                         if (!cpuc->lbr_entries[i].from)
645                                 continue;
646                 }
647                 i++;
648         }
649 }
650 
651 /*
652  * Map interface branch filters onto LBR filters
653  */
654 static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
655         [PERF_SAMPLE_BRANCH_ANY]        = LBR_ANY,
656         [PERF_SAMPLE_BRANCH_USER]       = LBR_USER,
657         [PERF_SAMPLE_BRANCH_KERNEL]     = LBR_KERNEL,
658         [PERF_SAMPLE_BRANCH_HV]         = LBR_IGN,
659         [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_REL_JMP
660                                         | LBR_IND_JMP | LBR_FAR,
661         /*
662          * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
663          */
664         [PERF_SAMPLE_BRANCH_ANY_CALL] =
665          LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
666         /*
667          * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
668          */
669         [PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
670 };
671 
672 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
673         [PERF_SAMPLE_BRANCH_ANY]        = LBR_ANY,
674         [PERF_SAMPLE_BRANCH_USER]       = LBR_USER,
675         [PERF_SAMPLE_BRANCH_KERNEL]     = LBR_KERNEL,
676         [PERF_SAMPLE_BRANCH_HV]         = LBR_IGN,
677         [PERF_SAMPLE_BRANCH_ANY_RETURN] = LBR_RETURN | LBR_FAR,
678         [PERF_SAMPLE_BRANCH_ANY_CALL]   = LBR_REL_CALL | LBR_IND_CALL
679                                         | LBR_FAR,
680         [PERF_SAMPLE_BRANCH_IND_CALL]   = LBR_IND_CALL,
681 };
682 
683 /* core */
684 void intel_pmu_lbr_init_core(void)
685 {
686         x86_pmu.lbr_nr     = 4;
687         x86_pmu.lbr_tos    = MSR_LBR_TOS;
688         x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
689         x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
690 
691         /*
692          * SW branch filter usage:
693          * - compensate for lack of HW filter
694          */
695         pr_cont("4-deep LBR, ");
696 }
697 
698 /* nehalem/westmere */
699 void intel_pmu_lbr_init_nhm(void)
700 {
701         x86_pmu.lbr_nr     = 16;
702         x86_pmu.lbr_tos    = MSR_LBR_TOS;
703         x86_pmu.lbr_from   = MSR_LBR_NHM_FROM;
704         x86_pmu.lbr_to     = MSR_LBR_NHM_TO;
705 
706         x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
707         x86_pmu.lbr_sel_map  = nhm_lbr_sel_map;
708 
709         /*
710          * SW branch filter usage:
711          * - workaround LBR_SEL errata (see above)
712          * - support syscall, sysret capture.
713          *   That requires LBR_FAR but that means far
714          *   jmp need to be filtered out
715          */
716         pr_cont("16-deep LBR, ");
717 }
718 
719 /* sandy bridge */
720 void intel_pmu_lbr_init_snb(void)
721 {
722         x86_pmu.lbr_nr   = 16;
723         x86_pmu.lbr_tos  = MSR_LBR_TOS;
724         x86_pmu.lbr_from = MSR_LBR_NHM_FROM;
725         x86_pmu.lbr_to   = MSR_LBR_NHM_TO;
726 
727         x86_pmu.lbr_sel_mask = LBR_SEL_MASK;
728         x86_pmu.lbr_sel_map  = snb_lbr_sel_map;
729 
730         /*
731          * SW branch filter usage:
732          * - support syscall, sysret capture.
733          *   That requires LBR_FAR but that means far
734          *   jmp need to be filtered out
735          */
736         pr_cont("16-deep LBR, ");
737 }
738 
739 /* atom */
740 void intel_pmu_lbr_init_atom(void)
741 {
742         /*
743          * only models starting at stepping 10 seems
744          * to have an operational LBR which can freeze
745          * on PMU interrupt
746          */
747         if (boot_cpu_data.x86_model == 28
748             && boot_cpu_data.x86_mask < 10) {
749                 pr_cont("LBR disabled due to erratum");
750                 return;
751         }
752 
753         x86_pmu.lbr_nr     = 8;
754         x86_pmu.lbr_tos    = MSR_LBR_TOS;
755         x86_pmu.lbr_from   = MSR_LBR_CORE_FROM;
756         x86_pmu.lbr_to     = MSR_LBR_CORE_TO;
757 
758         /*
759          * SW branch filter usage:
760          * - compensate for lack of HW filter
761          */
762         pr_cont("8-deep LBR, ");
763 }
764 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp