~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/events/intel/core.c

Version: ~ [ linux-5.8 ] ~ [ linux-5.7.14 ] ~ [ linux-5.6.19 ] ~ [ linux-5.5.19 ] ~ [ linux-5.4.57 ] ~ [ linux-5.3.18 ] ~ [ linux-5.2.21 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.138 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.193 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.232 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.232 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.85 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 // SPDX-License-Identifier: GPL-2.0-only
  2 /*
  3  * Per core/cpu state
  4  *
  5  * Used to coordinate shared registers between HT threads or
  6  * among events on a single PMU.
  7  */
  8 
  9 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 10 
 11 #include <linux/stddef.h>
 12 #include <linux/types.h>
 13 #include <linux/init.h>
 14 #include <linux/slab.h>
 15 #include <linux/export.h>
 16 #include <linux/nmi.h>
 17 
 18 #include <asm/cpufeature.h>
 19 #include <asm/hardirq.h>
 20 #include <asm/intel-family.h>
 21 #include <asm/apic.h>
 22 #include <asm/cpu_device_id.h>
 23 
 24 #include "../perf_event.h"
 25 
 26 /*
 27  * Intel PerfMon, used on Core and later.
 28  */
 29 static u64 intel_perfmon_event_map[PERF_COUNT_HW_MAX] __read_mostly =
 30 {
 31         [PERF_COUNT_HW_CPU_CYCLES]              = 0x003c,
 32         [PERF_COUNT_HW_INSTRUCTIONS]            = 0x00c0,
 33         [PERF_COUNT_HW_CACHE_REFERENCES]        = 0x4f2e,
 34         [PERF_COUNT_HW_CACHE_MISSES]            = 0x412e,
 35         [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]     = 0x00c4,
 36         [PERF_COUNT_HW_BRANCH_MISSES]           = 0x00c5,
 37         [PERF_COUNT_HW_BUS_CYCLES]              = 0x013c,
 38         [PERF_COUNT_HW_REF_CPU_CYCLES]          = 0x0300, /* pseudo-encoding */
 39 };
 40 
 41 static struct event_constraint intel_core_event_constraints[] __read_mostly =
 42 {
 43         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 44         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
 45         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
 46         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
 47         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
 48         INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */
 49         EVENT_CONSTRAINT_END
 50 };
 51 
 52 static struct event_constraint intel_core2_event_constraints[] __read_mostly =
 53 {
 54         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 55         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 56         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 57         INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */
 58         INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
 59         INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */
 60         INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */
 61         INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */
 62         INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */
 63         INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */
 64         INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */
 65         INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */
 66         INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */
 67         EVENT_CONSTRAINT_END
 68 };
 69 
 70 static struct event_constraint intel_nehalem_event_constraints[] __read_mostly =
 71 {
 72         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 73         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 74         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 75         INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 76         INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 77         INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
 78         INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
 79         INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
 80         INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
 81         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
 82         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
 83         EVENT_CONSTRAINT_END
 84 };
 85 
 86 static struct extra_reg intel_nehalem_extra_regs[] __read_mostly =
 87 {
 88         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
 89         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
 90         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
 91         EVENT_EXTRA_END
 92 };
 93 
 94 static struct event_constraint intel_westmere_event_constraints[] __read_mostly =
 95 {
 96         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
 97         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
 98         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
 99         INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
100         INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
101         INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
102         INTEL_EVENT_CONSTRAINT(0xb3, 0x1), /* SNOOPQ_REQUEST_OUTSTANDING */
103         EVENT_CONSTRAINT_END
104 };
105 
106 static struct event_constraint intel_snb_event_constraints[] __read_mostly =
107 {
108         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
109         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
110         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
111         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
112         INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
113         INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
114         INTEL_UEVENT_CONSTRAINT(0x06a3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
115         INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
116         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
117         INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
118         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_DISPATCH */
119         INTEL_UEVENT_CONSTRAINT(0x02a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
120 
121         /*
122          * When HT is off these events can only run on the bottom 4 counters
123          * When HT is on, they are impacted by the HT bug and require EXCL access
124          */
125         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
126         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
127         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
128         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
129 
130         EVENT_CONSTRAINT_END
131 };
132 
133 static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
134 {
135         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
136         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
137         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
138         INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
139         INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
140         INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
141         INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
142         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
143         INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
144         INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
145         INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
146         INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4), /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
147         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
148 
149         /*
150          * When HT is off these events can only run on the bottom 4 counters
151          * When HT is on, they are impacted by the HT bug and require EXCL access
152          */
153         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
154         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
155         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
156         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
157 
158         EVENT_CONSTRAINT_END
159 };
160 
161 static struct extra_reg intel_westmere_extra_regs[] __read_mostly =
162 {
163         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
164         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffff, RSP_0),
165         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0xffff, RSP_1),
166         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x100b),
167         EVENT_EXTRA_END
168 };
169 
170 static struct event_constraint intel_v1_event_constraints[] __read_mostly =
171 {
172         EVENT_CONSTRAINT_END
173 };
174 
175 static struct event_constraint intel_gen_event_constraints[] __read_mostly =
176 {
177         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
178         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
179         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
180         EVENT_CONSTRAINT_END
181 };
182 
183 static struct event_constraint intel_slm_event_constraints[] __read_mostly =
184 {
185         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
186         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
187         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* pseudo CPU_CLK_UNHALTED.REF */
188         EVENT_CONSTRAINT_END
189 };
190 
191 static struct event_constraint intel_skl_event_constraints[] = {
192         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
193         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
194         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
195         INTEL_UEVENT_CONSTRAINT(0x1c0, 0x2),    /* INST_RETIRED.PREC_DIST */
196 
197         /*
198          * when HT is off, these can only run on the bottom 4 counters
199          */
200         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),      /* MEM_INST_RETIRED.* */
201         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),      /* MEM_LOAD_RETIRED.* */
202         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),      /* MEM_LOAD_L3_HIT_RETIRED.* */
203         INTEL_EVENT_CONSTRAINT(0xcd, 0xf),      /* MEM_TRANS_RETIRED.* */
204         INTEL_EVENT_CONSTRAINT(0xc6, 0xf),      /* FRONTEND_RETIRED.* */
205 
206         EVENT_CONSTRAINT_END
207 };
208 
209 static struct extra_reg intel_knl_extra_regs[] __read_mostly = {
210         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x799ffbb6e7ull, RSP_0),
211         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x399ffbffe7ull, RSP_1),
212         EVENT_EXTRA_END
213 };
214 
215 static struct extra_reg intel_snb_extra_regs[] __read_mostly = {
216         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
217         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3f807f8fffull, RSP_0),
218         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3f807f8fffull, RSP_1),
219         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
220         EVENT_EXTRA_END
221 };
222 
223 static struct extra_reg intel_snbep_extra_regs[] __read_mostly = {
224         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
225         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
226         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
227         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
228         EVENT_EXTRA_END
229 };
230 
231 static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
232         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
233         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
234         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
235         /*
236          * Note the low 8 bits eventsel code is not a continuous field, containing
237          * some #GPing bits. These are masked out.
238          */
239         INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
240         EVENT_EXTRA_END
241 };
242 
243 static struct event_constraint intel_icl_event_constraints[] = {
244         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
245         INTEL_UEVENT_CONSTRAINT(0x1c0, 0),      /* INST_RETIRED.PREC_DIST */
246         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
247         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
248         FIXED_EVENT_CONSTRAINT(0x0400, 3),      /* SLOTS */
249         INTEL_EVENT_CONSTRAINT_RANGE(0x03, 0x0a, 0xf),
250         INTEL_EVENT_CONSTRAINT_RANGE(0x1f, 0x28, 0xf),
251         INTEL_EVENT_CONSTRAINT(0x32, 0xf),      /* SW_PREFETCH_ACCESS.* */
252         INTEL_EVENT_CONSTRAINT_RANGE(0x48, 0x54, 0xf),
253         INTEL_EVENT_CONSTRAINT_RANGE(0x60, 0x8b, 0xf),
254         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_TOTAL */
255         INTEL_UEVENT_CONSTRAINT(0x10a3, 0xff),  /* CYCLE_ACTIVITY.STALLS_MEM_ANY */
256         INTEL_EVENT_CONSTRAINT(0xa3, 0xf),      /* CYCLE_ACTIVITY.* */
257         INTEL_EVENT_CONSTRAINT_RANGE(0xa8, 0xb0, 0xf),
258         INTEL_EVENT_CONSTRAINT_RANGE(0xb7, 0xbd, 0xf),
259         INTEL_EVENT_CONSTRAINT_RANGE(0xd0, 0xe6, 0xf),
260         INTEL_EVENT_CONSTRAINT_RANGE(0xf0, 0xf4, 0xf),
261         EVENT_CONSTRAINT_END
262 };
263 
264 static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
265         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
266         INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
267         INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
268         INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
269         EVENT_EXTRA_END
270 };
271 
272 EVENT_ATTR_STR(mem-loads,       mem_ld_nhm,     "event=0x0b,umask=0x10,ldlat=3");
273 EVENT_ATTR_STR(mem-loads,       mem_ld_snb,     "event=0xcd,umask=0x1,ldlat=3");
274 EVENT_ATTR_STR(mem-stores,      mem_st_snb,     "event=0xcd,umask=0x2");
275 
276 static struct attribute *nhm_mem_events_attrs[] = {
277         EVENT_PTR(mem_ld_nhm),
278         NULL,
279 };
280 
281 /*
282  * topdown events for Intel Core CPUs.
283  *
284  * The events are all in slots, which is a free slot in a 4 wide
285  * pipeline. Some events are already reported in slots, for cycle
286  * events we multiply by the pipeline width (4).
287  *
288  * With Hyper Threading on, topdown metrics are either summed or averaged
289  * between the threads of a core: (count_t0 + count_t1).
290  *
291  * For the average case the metric is always scaled to pipeline width,
292  * so we use factor 2 ((count_t0 + count_t1) / 2 * 4)
293  */
294 
295 EVENT_ATTR_STR_HT(topdown-total-slots, td_total_slots,
296         "event=0x3c,umask=0x0",                 /* cpu_clk_unhalted.thread */
297         "event=0x3c,umask=0x0,any=1");          /* cpu_clk_unhalted.thread_any */
298 EVENT_ATTR_STR_HT(topdown-total-slots.scale, td_total_slots_scale, "4", "2");
299 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued,
300         "event=0xe,umask=0x1");                 /* uops_issued.any */
301 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired,
302         "event=0xc2,umask=0x2");                /* uops_retired.retire_slots */
303 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles,
304         "event=0x9c,umask=0x1");                /* idq_uops_not_delivered_core */
305 EVENT_ATTR_STR_HT(topdown-recovery-bubbles, td_recovery_bubbles,
306         "event=0xd,umask=0x3,cmask=1",          /* int_misc.recovery_cycles */
307         "event=0xd,umask=0x3,cmask=1,any=1");   /* int_misc.recovery_cycles_any */
308 EVENT_ATTR_STR_HT(topdown-recovery-bubbles.scale, td_recovery_bubbles_scale,
309         "4", "2");
310 
311 static struct attribute *snb_events_attrs[] = {
312         EVENT_PTR(td_slots_issued),
313         EVENT_PTR(td_slots_retired),
314         EVENT_PTR(td_fetch_bubbles),
315         EVENT_PTR(td_total_slots),
316         EVENT_PTR(td_total_slots_scale),
317         EVENT_PTR(td_recovery_bubbles),
318         EVENT_PTR(td_recovery_bubbles_scale),
319         NULL,
320 };
321 
322 static struct attribute *snb_mem_events_attrs[] = {
323         EVENT_PTR(mem_ld_snb),
324         EVENT_PTR(mem_st_snb),
325         NULL,
326 };
327 
328 static struct event_constraint intel_hsw_event_constraints[] = {
329         FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
330         FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
331         FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
332         INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
333         INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
334         INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
335         /* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
336         INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
337         /* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
338         INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
339         /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
340         INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
341 
342         /*
343          * When HT is off these events can only run on the bottom 4 counters
344          * When HT is on, they are impacted by the HT bug and require EXCL access
345          */
346         INTEL_EXCLEVT_CONSTRAINT(0xd0, 0xf), /* MEM_UOPS_RETIRED.* */
347         INTEL_EXCLEVT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */
348         INTEL_EXCLEVT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
349         INTEL_EXCLEVT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
350 
351         EVENT_CONSTRAINT_END
352 };
353 
354 static struct event_constraint intel_bdw_event_constraints[] = {
355         FIXED_EVENT_CONSTRAINT(0x00c0, 0),      /* INST_RETIRED.ANY */
356         FIXED_EVENT_CONSTRAINT(0x003c, 1),      /* CPU_CLK_UNHALTED.CORE */
357         FIXED_EVENT_CONSTRAINT(0x0300, 2),      /* CPU_CLK_UNHALTED.REF */
358         INTEL_UEVENT_CONSTRAINT(0x148, 0x4),    /* L1D_PEND_MISS.PENDING */
359         INTEL_UBIT_EVENT_CONSTRAINT(0x8a3, 0x4),        /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
360         /*
361          * when HT is off, these can only run on the bottom 4 counters
362          */
363         INTEL_EVENT_CONSTRAINT(0xd0, 0xf),      /* MEM_INST_RETIRED.* */
364         INTEL_EVENT_CONSTRAINT(0xd1, 0xf),      /* MEM_LOAD_RETIRED.* */
365         INTEL_EVENT_CONSTRAINT(0xd2, 0xf),      /* MEM_LOAD_L3_HIT_RETIRED.* */
366         INTEL_EVENT_CONSTRAINT(0xcd, 0xf),      /* MEM_TRANS_RETIRED.* */
367         EVENT_CONSTRAINT_END
368 };
369 
370 static u64 intel_pmu_event_map(int hw_event)
371 {
372         return intel_perfmon_event_map[hw_event];
373 }
374 
375 /*
376  * Notes on the events:
377  * - data reads do not include code reads (comparable to earlier tables)
378  * - data counts include speculative execution (except L1 write, dtlb, bpu)
379  * - remote node access includes remote memory, remote cache, remote mmio.
380  * - prefetches are not included in the counts.
381  * - icache miss does not include decoded icache
382  */
383 
384 #define SKL_DEMAND_DATA_RD              BIT_ULL(0)
385 #define SKL_DEMAND_RFO                  BIT_ULL(1)
386 #define SKL_ANY_RESPONSE                BIT_ULL(16)
387 #define SKL_SUPPLIER_NONE               BIT_ULL(17)
388 #define SKL_L3_MISS_LOCAL_DRAM          BIT_ULL(26)
389 #define SKL_L3_MISS_REMOTE_HOP0_DRAM    BIT_ULL(27)
390 #define SKL_L3_MISS_REMOTE_HOP1_DRAM    BIT_ULL(28)
391 #define SKL_L3_MISS_REMOTE_HOP2P_DRAM   BIT_ULL(29)
392 #define SKL_L3_MISS                     (SKL_L3_MISS_LOCAL_DRAM| \
393                                          SKL_L3_MISS_REMOTE_HOP0_DRAM| \
394                                          SKL_L3_MISS_REMOTE_HOP1_DRAM| \
395                                          SKL_L3_MISS_REMOTE_HOP2P_DRAM)
396 #define SKL_SPL_HIT                     BIT_ULL(30)
397 #define SKL_SNOOP_NONE                  BIT_ULL(31)
398 #define SKL_SNOOP_NOT_NEEDED            BIT_ULL(32)
399 #define SKL_SNOOP_MISS                  BIT_ULL(33)
400 #define SKL_SNOOP_HIT_NO_FWD            BIT_ULL(34)
401 #define SKL_SNOOP_HIT_WITH_FWD          BIT_ULL(35)
402 #define SKL_SNOOP_HITM                  BIT_ULL(36)
403 #define SKL_SNOOP_NON_DRAM              BIT_ULL(37)
404 #define SKL_ANY_SNOOP                   (SKL_SPL_HIT|SKL_SNOOP_NONE| \
405                                          SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
406                                          SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
407                                          SKL_SNOOP_HITM|SKL_SNOOP_NON_DRAM)
408 #define SKL_DEMAND_READ                 SKL_DEMAND_DATA_RD
409 #define SKL_SNOOP_DRAM                  (SKL_SNOOP_NONE| \
410                                          SKL_SNOOP_NOT_NEEDED|SKL_SNOOP_MISS| \
411                                          SKL_SNOOP_HIT_NO_FWD|SKL_SNOOP_HIT_WITH_FWD| \
412                                          SKL_SNOOP_HITM|SKL_SPL_HIT)
413 #define SKL_DEMAND_WRITE                SKL_DEMAND_RFO
414 #define SKL_LLC_ACCESS                  SKL_ANY_RESPONSE
415 #define SKL_L3_MISS_REMOTE              (SKL_L3_MISS_REMOTE_HOP0_DRAM| \
416                                          SKL_L3_MISS_REMOTE_HOP1_DRAM| \
417                                          SKL_L3_MISS_REMOTE_HOP2P_DRAM)
418 
419 static __initconst const u64 skl_hw_cache_event_ids
420                                 [PERF_COUNT_HW_CACHE_MAX]
421                                 [PERF_COUNT_HW_CACHE_OP_MAX]
422                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
423 {
424  [ C(L1D ) ] = {
425         [ C(OP_READ) ] = {
426                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
427                 [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
428         },
429         [ C(OP_WRITE) ] = {
430                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
431                 [ C(RESULT_MISS)   ] = 0x0,
432         },
433         [ C(OP_PREFETCH) ] = {
434                 [ C(RESULT_ACCESS) ] = 0x0,
435                 [ C(RESULT_MISS)   ] = 0x0,
436         },
437  },
438  [ C(L1I ) ] = {
439         [ C(OP_READ) ] = {
440                 [ C(RESULT_ACCESS) ] = 0x0,
441                 [ C(RESULT_MISS)   ] = 0x283,   /* ICACHE_64B.MISS */
442         },
443         [ C(OP_WRITE) ] = {
444                 [ C(RESULT_ACCESS) ] = -1,
445                 [ C(RESULT_MISS)   ] = -1,
446         },
447         [ C(OP_PREFETCH) ] = {
448                 [ C(RESULT_ACCESS) ] = 0x0,
449                 [ C(RESULT_MISS)   ] = 0x0,
450         },
451  },
452  [ C(LL  ) ] = {
453         [ C(OP_READ) ] = {
454                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
455                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
456         },
457         [ C(OP_WRITE) ] = {
458                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
459                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
460         },
461         [ C(OP_PREFETCH) ] = {
462                 [ C(RESULT_ACCESS) ] = 0x0,
463                 [ C(RESULT_MISS)   ] = 0x0,
464         },
465  },
466  [ C(DTLB) ] = {
467         [ C(OP_READ) ] = {
468                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_INST_RETIRED.ALL_LOADS */
469                 [ C(RESULT_MISS)   ] = 0xe08,   /* DTLB_LOAD_MISSES.WALK_COMPLETED */
470         },
471         [ C(OP_WRITE) ] = {
472                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_INST_RETIRED.ALL_STORES */
473                 [ C(RESULT_MISS)   ] = 0xe49,   /* DTLB_STORE_MISSES.WALK_COMPLETED */
474         },
475         [ C(OP_PREFETCH) ] = {
476                 [ C(RESULT_ACCESS) ] = 0x0,
477                 [ C(RESULT_MISS)   ] = 0x0,
478         },
479  },
480  [ C(ITLB) ] = {
481         [ C(OP_READ) ] = {
482                 [ C(RESULT_ACCESS) ] = 0x2085,  /* ITLB_MISSES.STLB_HIT */
483                 [ C(RESULT_MISS)   ] = 0xe85,   /* ITLB_MISSES.WALK_COMPLETED */
484         },
485         [ C(OP_WRITE) ] = {
486                 [ C(RESULT_ACCESS) ] = -1,
487                 [ C(RESULT_MISS)   ] = -1,
488         },
489         [ C(OP_PREFETCH) ] = {
490                 [ C(RESULT_ACCESS) ] = -1,
491                 [ C(RESULT_MISS)   ] = -1,
492         },
493  },
494  [ C(BPU ) ] = {
495         [ C(OP_READ) ] = {
496                 [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
497                 [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
498         },
499         [ C(OP_WRITE) ] = {
500                 [ C(RESULT_ACCESS) ] = -1,
501                 [ C(RESULT_MISS)   ] = -1,
502         },
503         [ C(OP_PREFETCH) ] = {
504                 [ C(RESULT_ACCESS) ] = -1,
505                 [ C(RESULT_MISS)   ] = -1,
506         },
507  },
508  [ C(NODE) ] = {
509         [ C(OP_READ) ] = {
510                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
511                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
512         },
513         [ C(OP_WRITE) ] = {
514                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
515                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
516         },
517         [ C(OP_PREFETCH) ] = {
518                 [ C(RESULT_ACCESS) ] = 0x0,
519                 [ C(RESULT_MISS)   ] = 0x0,
520         },
521  },
522 };
523 
524 static __initconst const u64 skl_hw_cache_extra_regs
525                                 [PERF_COUNT_HW_CACHE_MAX]
526                                 [PERF_COUNT_HW_CACHE_OP_MAX]
527                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
528 {
529  [ C(LL  ) ] = {
530         [ C(OP_READ) ] = {
531                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
532                                        SKL_LLC_ACCESS|SKL_ANY_SNOOP,
533                 [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
534                                        SKL_L3_MISS|SKL_ANY_SNOOP|
535                                        SKL_SUPPLIER_NONE,
536         },
537         [ C(OP_WRITE) ] = {
538                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
539                                        SKL_LLC_ACCESS|SKL_ANY_SNOOP,
540                 [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
541                                        SKL_L3_MISS|SKL_ANY_SNOOP|
542                                        SKL_SUPPLIER_NONE,
543         },
544         [ C(OP_PREFETCH) ] = {
545                 [ C(RESULT_ACCESS) ] = 0x0,
546                 [ C(RESULT_MISS)   ] = 0x0,
547         },
548  },
549  [ C(NODE) ] = {
550         [ C(OP_READ) ] = {
551                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_READ|
552                                        SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
553                 [ C(RESULT_MISS)   ] = SKL_DEMAND_READ|
554                                        SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
555         },
556         [ C(OP_WRITE) ] = {
557                 [ C(RESULT_ACCESS) ] = SKL_DEMAND_WRITE|
558                                        SKL_L3_MISS_LOCAL_DRAM|SKL_SNOOP_DRAM,
559                 [ C(RESULT_MISS)   ] = SKL_DEMAND_WRITE|
560                                        SKL_L3_MISS_REMOTE|SKL_SNOOP_DRAM,
561         },
562         [ C(OP_PREFETCH) ] = {
563                 [ C(RESULT_ACCESS) ] = 0x0,
564                 [ C(RESULT_MISS)   ] = 0x0,
565         },
566  },
567 };
568 
569 #define SNB_DMND_DATA_RD        (1ULL << 0)
570 #define SNB_DMND_RFO            (1ULL << 1)
571 #define SNB_DMND_IFETCH         (1ULL << 2)
572 #define SNB_DMND_WB             (1ULL << 3)
573 #define SNB_PF_DATA_RD          (1ULL << 4)
574 #define SNB_PF_RFO              (1ULL << 5)
575 #define SNB_PF_IFETCH           (1ULL << 6)
576 #define SNB_LLC_DATA_RD         (1ULL << 7)
577 #define SNB_LLC_RFO             (1ULL << 8)
578 #define SNB_LLC_IFETCH          (1ULL << 9)
579 #define SNB_BUS_LOCKS           (1ULL << 10)
580 #define SNB_STRM_ST             (1ULL << 11)
581 #define SNB_OTHER               (1ULL << 15)
582 #define SNB_RESP_ANY            (1ULL << 16)
583 #define SNB_NO_SUPP             (1ULL << 17)
584 #define SNB_LLC_HITM            (1ULL << 18)
585 #define SNB_LLC_HITE            (1ULL << 19)
586 #define SNB_LLC_HITS            (1ULL << 20)
587 #define SNB_LLC_HITF            (1ULL << 21)
588 #define SNB_LOCAL               (1ULL << 22)
589 #define SNB_REMOTE              (0xffULL << 23)
590 #define SNB_SNP_NONE            (1ULL << 31)
591 #define SNB_SNP_NOT_NEEDED      (1ULL << 32)
592 #define SNB_SNP_MISS            (1ULL << 33)
593 #define SNB_NO_FWD              (1ULL << 34)
594 #define SNB_SNP_FWD             (1ULL << 35)
595 #define SNB_HITM                (1ULL << 36)
596 #define SNB_NON_DRAM            (1ULL << 37)
597 
598 #define SNB_DMND_READ           (SNB_DMND_DATA_RD|SNB_LLC_DATA_RD)
599 #define SNB_DMND_WRITE          (SNB_DMND_RFO|SNB_LLC_RFO)
600 #define SNB_DMND_PREFETCH       (SNB_PF_DATA_RD|SNB_PF_RFO)
601 
602 #define SNB_SNP_ANY             (SNB_SNP_NONE|SNB_SNP_NOT_NEEDED| \
603                                  SNB_SNP_MISS|SNB_NO_FWD|SNB_SNP_FWD| \
604                                  SNB_HITM)
605 
606 #define SNB_DRAM_ANY            (SNB_LOCAL|SNB_REMOTE|SNB_SNP_ANY)
607 #define SNB_DRAM_REMOTE         (SNB_REMOTE|SNB_SNP_ANY)
608 
609 #define SNB_L3_ACCESS           SNB_RESP_ANY
610 #define SNB_L3_MISS             (SNB_DRAM_ANY|SNB_NON_DRAM)
611 
612 static __initconst const u64 snb_hw_cache_extra_regs
613                                 [PERF_COUNT_HW_CACHE_MAX]
614                                 [PERF_COUNT_HW_CACHE_OP_MAX]
615                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
616 {
617  [ C(LL  ) ] = {
618         [ C(OP_READ) ] = {
619                 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_L3_ACCESS,
620                 [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_L3_MISS,
621         },
622         [ C(OP_WRITE) ] = {
623                 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_L3_ACCESS,
624                 [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_L3_MISS,
625         },
626         [ C(OP_PREFETCH) ] = {
627                 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_L3_ACCESS,
628                 [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_L3_MISS,
629         },
630  },
631  [ C(NODE) ] = {
632         [ C(OP_READ) ] = {
633                 [ C(RESULT_ACCESS) ] = SNB_DMND_READ|SNB_DRAM_ANY,
634                 [ C(RESULT_MISS)   ] = SNB_DMND_READ|SNB_DRAM_REMOTE,
635         },
636         [ C(OP_WRITE) ] = {
637                 [ C(RESULT_ACCESS) ] = SNB_DMND_WRITE|SNB_DRAM_ANY,
638                 [ C(RESULT_MISS)   ] = SNB_DMND_WRITE|SNB_DRAM_REMOTE,
639         },
640         [ C(OP_PREFETCH) ] = {
641                 [ C(RESULT_ACCESS) ] = SNB_DMND_PREFETCH|SNB_DRAM_ANY,
642                 [ C(RESULT_MISS)   ] = SNB_DMND_PREFETCH|SNB_DRAM_REMOTE,
643         },
644  },
645 };
646 
647 static __initconst const u64 snb_hw_cache_event_ids
648                                 [PERF_COUNT_HW_CACHE_MAX]
649                                 [PERF_COUNT_HW_CACHE_OP_MAX]
650                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
651 {
652  [ C(L1D) ] = {
653         [ C(OP_READ) ] = {
654                 [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS        */
655                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPLACEMENT              */
656         },
657         [ C(OP_WRITE) ] = {
658                 [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES       */
659                 [ C(RESULT_MISS)   ] = 0x0851, /* L1D.ALL_M_REPLACEMENT        */
660         },
661         [ C(OP_PREFETCH) ] = {
662                 [ C(RESULT_ACCESS) ] = 0x0,
663                 [ C(RESULT_MISS)   ] = 0x024e, /* HW_PRE_REQ.DL1_MISS          */
664         },
665  },
666  [ C(L1I ) ] = {
667         [ C(OP_READ) ] = {
668                 [ C(RESULT_ACCESS) ] = 0x0,
669                 [ C(RESULT_MISS)   ] = 0x0280, /* ICACHE.MISSES */
670         },
671         [ C(OP_WRITE) ] = {
672                 [ C(RESULT_ACCESS) ] = -1,
673                 [ C(RESULT_MISS)   ] = -1,
674         },
675         [ C(OP_PREFETCH) ] = {
676                 [ C(RESULT_ACCESS) ] = 0x0,
677                 [ C(RESULT_MISS)   ] = 0x0,
678         },
679  },
680  [ C(LL  ) ] = {
681         [ C(OP_READ) ] = {
682                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
683                 [ C(RESULT_ACCESS) ] = 0x01b7,
684                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
685                 [ C(RESULT_MISS)   ] = 0x01b7,
686         },
687         [ C(OP_WRITE) ] = {
688                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
689                 [ C(RESULT_ACCESS) ] = 0x01b7,
690                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
691                 [ C(RESULT_MISS)   ] = 0x01b7,
692         },
693         [ C(OP_PREFETCH) ] = {
694                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
695                 [ C(RESULT_ACCESS) ] = 0x01b7,
696                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
697                 [ C(RESULT_MISS)   ] = 0x01b7,
698         },
699  },
700  [ C(DTLB) ] = {
701         [ C(OP_READ) ] = {
702                 [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ALL_LOADS */
703                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
704         },
705         [ C(OP_WRITE) ] = {
706                 [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ALL_STORES */
707                 [ C(RESULT_MISS)   ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
708         },
709         [ C(OP_PREFETCH) ] = {
710                 [ C(RESULT_ACCESS) ] = 0x0,
711                 [ C(RESULT_MISS)   ] = 0x0,
712         },
713  },
714  [ C(ITLB) ] = {
715         [ C(OP_READ) ] = {
716                 [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT         */
717                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK    */
718         },
719         [ C(OP_WRITE) ] = {
720                 [ C(RESULT_ACCESS) ] = -1,
721                 [ C(RESULT_MISS)   ] = -1,
722         },
723         [ C(OP_PREFETCH) ] = {
724                 [ C(RESULT_ACCESS) ] = -1,
725                 [ C(RESULT_MISS)   ] = -1,
726         },
727  },
728  [ C(BPU ) ] = {
729         [ C(OP_READ) ] = {
730                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
731                 [ C(RESULT_MISS)   ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
732         },
733         [ C(OP_WRITE) ] = {
734                 [ C(RESULT_ACCESS) ] = -1,
735                 [ C(RESULT_MISS)   ] = -1,
736         },
737         [ C(OP_PREFETCH) ] = {
738                 [ C(RESULT_ACCESS) ] = -1,
739                 [ C(RESULT_MISS)   ] = -1,
740         },
741  },
742  [ C(NODE) ] = {
743         [ C(OP_READ) ] = {
744                 [ C(RESULT_ACCESS) ] = 0x01b7,
745                 [ C(RESULT_MISS)   ] = 0x01b7,
746         },
747         [ C(OP_WRITE) ] = {
748                 [ C(RESULT_ACCESS) ] = 0x01b7,
749                 [ C(RESULT_MISS)   ] = 0x01b7,
750         },
751         [ C(OP_PREFETCH) ] = {
752                 [ C(RESULT_ACCESS) ] = 0x01b7,
753                 [ C(RESULT_MISS)   ] = 0x01b7,
754         },
755  },
756 
757 };
758 
759 /*
760  * Notes on the events:
761  * - data reads do not include code reads (comparable to earlier tables)
762  * - data counts include speculative execution (except L1 write, dtlb, bpu)
763  * - remote node access includes remote memory, remote cache, remote mmio.
764  * - prefetches are not included in the counts because they are not
765  *   reliably counted.
766  */
767 
768 #define HSW_DEMAND_DATA_RD              BIT_ULL(0)
769 #define HSW_DEMAND_RFO                  BIT_ULL(1)
770 #define HSW_ANY_RESPONSE                BIT_ULL(16)
771 #define HSW_SUPPLIER_NONE               BIT_ULL(17)
772 #define HSW_L3_MISS_LOCAL_DRAM          BIT_ULL(22)
773 #define HSW_L3_MISS_REMOTE_HOP0         BIT_ULL(27)
774 #define HSW_L3_MISS_REMOTE_HOP1         BIT_ULL(28)
775 #define HSW_L3_MISS_REMOTE_HOP2P        BIT_ULL(29)
776 #define HSW_L3_MISS                     (HSW_L3_MISS_LOCAL_DRAM| \
777                                          HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
778                                          HSW_L3_MISS_REMOTE_HOP2P)
779 #define HSW_SNOOP_NONE                  BIT_ULL(31)
780 #define HSW_SNOOP_NOT_NEEDED            BIT_ULL(32)
781 #define HSW_SNOOP_MISS                  BIT_ULL(33)
782 #define HSW_SNOOP_HIT_NO_FWD            BIT_ULL(34)
783 #define HSW_SNOOP_HIT_WITH_FWD          BIT_ULL(35)
784 #define HSW_SNOOP_HITM                  BIT_ULL(36)
785 #define HSW_SNOOP_NON_DRAM              BIT_ULL(37)
786 #define HSW_ANY_SNOOP                   (HSW_SNOOP_NONE| \
787                                          HSW_SNOOP_NOT_NEEDED|HSW_SNOOP_MISS| \
788                                          HSW_SNOOP_HIT_NO_FWD|HSW_SNOOP_HIT_WITH_FWD| \
789                                          HSW_SNOOP_HITM|HSW_SNOOP_NON_DRAM)
790 #define HSW_SNOOP_DRAM                  (HSW_ANY_SNOOP & ~HSW_SNOOP_NON_DRAM)
791 #define HSW_DEMAND_READ                 HSW_DEMAND_DATA_RD
792 #define HSW_DEMAND_WRITE                HSW_DEMAND_RFO
793 #define HSW_L3_MISS_REMOTE              (HSW_L3_MISS_REMOTE_HOP0|\
794                                          HSW_L3_MISS_REMOTE_HOP1|HSW_L3_MISS_REMOTE_HOP2P)
795 #define HSW_LLC_ACCESS                  HSW_ANY_RESPONSE
796 
797 #define BDW_L3_MISS_LOCAL               BIT(26)
798 #define BDW_L3_MISS                     (BDW_L3_MISS_LOCAL| \
799                                          HSW_L3_MISS_REMOTE_HOP0|HSW_L3_MISS_REMOTE_HOP1| \
800                                          HSW_L3_MISS_REMOTE_HOP2P)
801 
802 
803 static __initconst const u64 hsw_hw_cache_event_ids
804                                 [PERF_COUNT_HW_CACHE_MAX]
805                                 [PERF_COUNT_HW_CACHE_OP_MAX]
806                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
807 {
808  [ C(L1D ) ] = {
809         [ C(OP_READ) ] = {
810                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
811                 [ C(RESULT_MISS)   ] = 0x151,   /* L1D.REPLACEMENT */
812         },
813         [ C(OP_WRITE) ] = {
814                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
815                 [ C(RESULT_MISS)   ] = 0x0,
816         },
817         [ C(OP_PREFETCH) ] = {
818                 [ C(RESULT_ACCESS) ] = 0x0,
819                 [ C(RESULT_MISS)   ] = 0x0,
820         },
821  },
822  [ C(L1I ) ] = {
823         [ C(OP_READ) ] = {
824                 [ C(RESULT_ACCESS) ] = 0x0,
825                 [ C(RESULT_MISS)   ] = 0x280,   /* ICACHE.MISSES */
826         },
827         [ C(OP_WRITE) ] = {
828                 [ C(RESULT_ACCESS) ] = -1,
829                 [ C(RESULT_MISS)   ] = -1,
830         },
831         [ C(OP_PREFETCH) ] = {
832                 [ C(RESULT_ACCESS) ] = 0x0,
833                 [ C(RESULT_MISS)   ] = 0x0,
834         },
835  },
836  [ C(LL  ) ] = {
837         [ C(OP_READ) ] = {
838                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
839                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
840         },
841         [ C(OP_WRITE) ] = {
842                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
843                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
844         },
845         [ C(OP_PREFETCH) ] = {
846                 [ C(RESULT_ACCESS) ] = 0x0,
847                 [ C(RESULT_MISS)   ] = 0x0,
848         },
849  },
850  [ C(DTLB) ] = {
851         [ C(OP_READ) ] = {
852                 [ C(RESULT_ACCESS) ] = 0x81d0,  /* MEM_UOPS_RETIRED.ALL_LOADS */
853                 [ C(RESULT_MISS)   ] = 0x108,   /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
854         },
855         [ C(OP_WRITE) ] = {
856                 [ C(RESULT_ACCESS) ] = 0x82d0,  /* MEM_UOPS_RETIRED.ALL_STORES */
857                 [ C(RESULT_MISS)   ] = 0x149,   /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
858         },
859         [ C(OP_PREFETCH) ] = {
860                 [ C(RESULT_ACCESS) ] = 0x0,
861                 [ C(RESULT_MISS)   ] = 0x0,
862         },
863  },
864  [ C(ITLB) ] = {
865         [ C(OP_READ) ] = {
866                 [ C(RESULT_ACCESS) ] = 0x6085,  /* ITLB_MISSES.STLB_HIT */
867                 [ C(RESULT_MISS)   ] = 0x185,   /* ITLB_MISSES.MISS_CAUSES_A_WALK */
868         },
869         [ C(OP_WRITE) ] = {
870                 [ C(RESULT_ACCESS) ] = -1,
871                 [ C(RESULT_MISS)   ] = -1,
872         },
873         [ C(OP_PREFETCH) ] = {
874                 [ C(RESULT_ACCESS) ] = -1,
875                 [ C(RESULT_MISS)   ] = -1,
876         },
877  },
878  [ C(BPU ) ] = {
879         [ C(OP_READ) ] = {
880                 [ C(RESULT_ACCESS) ] = 0xc4,    /* BR_INST_RETIRED.ALL_BRANCHES */
881                 [ C(RESULT_MISS)   ] = 0xc5,    /* BR_MISP_RETIRED.ALL_BRANCHES */
882         },
883         [ C(OP_WRITE) ] = {
884                 [ C(RESULT_ACCESS) ] = -1,
885                 [ C(RESULT_MISS)   ] = -1,
886         },
887         [ C(OP_PREFETCH) ] = {
888                 [ C(RESULT_ACCESS) ] = -1,
889                 [ C(RESULT_MISS)   ] = -1,
890         },
891  },
892  [ C(NODE) ] = {
893         [ C(OP_READ) ] = {
894                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
895                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
896         },
897         [ C(OP_WRITE) ] = {
898                 [ C(RESULT_ACCESS) ] = 0x1b7,   /* OFFCORE_RESPONSE */
899                 [ C(RESULT_MISS)   ] = 0x1b7,   /* OFFCORE_RESPONSE */
900         },
901         [ C(OP_PREFETCH) ] = {
902                 [ C(RESULT_ACCESS) ] = 0x0,
903                 [ C(RESULT_MISS)   ] = 0x0,
904         },
905  },
906 };
907 
908 static __initconst const u64 hsw_hw_cache_extra_regs
909                                 [PERF_COUNT_HW_CACHE_MAX]
910                                 [PERF_COUNT_HW_CACHE_OP_MAX]
911                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
912 {
913  [ C(LL  ) ] = {
914         [ C(OP_READ) ] = {
915                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
916                                        HSW_LLC_ACCESS,
917                 [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
918                                        HSW_L3_MISS|HSW_ANY_SNOOP,
919         },
920         [ C(OP_WRITE) ] = {
921                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
922                                        HSW_LLC_ACCESS,
923                 [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
924                                        HSW_L3_MISS|HSW_ANY_SNOOP,
925         },
926         [ C(OP_PREFETCH) ] = {
927                 [ C(RESULT_ACCESS) ] = 0x0,
928                 [ C(RESULT_MISS)   ] = 0x0,
929         },
930  },
931  [ C(NODE) ] = {
932         [ C(OP_READ) ] = {
933                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_READ|
934                                        HSW_L3_MISS_LOCAL_DRAM|
935                                        HSW_SNOOP_DRAM,
936                 [ C(RESULT_MISS)   ] = HSW_DEMAND_READ|
937                                        HSW_L3_MISS_REMOTE|
938                                        HSW_SNOOP_DRAM,
939         },
940         [ C(OP_WRITE) ] = {
941                 [ C(RESULT_ACCESS) ] = HSW_DEMAND_WRITE|
942                                        HSW_L3_MISS_LOCAL_DRAM|
943                                        HSW_SNOOP_DRAM,
944                 [ C(RESULT_MISS)   ] = HSW_DEMAND_WRITE|
945                                        HSW_L3_MISS_REMOTE|
946                                        HSW_SNOOP_DRAM,
947         },
948         [ C(OP_PREFETCH) ] = {
949                 [ C(RESULT_ACCESS) ] = 0x0,
950                 [ C(RESULT_MISS)   ] = 0x0,
951         },
952  },
953 };
954 
955 static __initconst const u64 westmere_hw_cache_event_ids
956                                 [PERF_COUNT_HW_CACHE_MAX]
957                                 [PERF_COUNT_HW_CACHE_OP_MAX]
958                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
959 {
960  [ C(L1D) ] = {
961         [ C(OP_READ) ] = {
962                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
963                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
964         },
965         [ C(OP_WRITE) ] = {
966                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
967                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
968         },
969         [ C(OP_PREFETCH) ] = {
970                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
971                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
972         },
973  },
974  [ C(L1I ) ] = {
975         [ C(OP_READ) ] = {
976                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
977                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
978         },
979         [ C(OP_WRITE) ] = {
980                 [ C(RESULT_ACCESS) ] = -1,
981                 [ C(RESULT_MISS)   ] = -1,
982         },
983         [ C(OP_PREFETCH) ] = {
984                 [ C(RESULT_ACCESS) ] = 0x0,
985                 [ C(RESULT_MISS)   ] = 0x0,
986         },
987  },
988  [ C(LL  ) ] = {
989         [ C(OP_READ) ] = {
990                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
991                 [ C(RESULT_ACCESS) ] = 0x01b7,
992                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
993                 [ C(RESULT_MISS)   ] = 0x01b7,
994         },
995         /*
996          * Use RFO, not WRITEBACK, because a write miss would typically occur
997          * on RFO.
998          */
999         [ C(OP_WRITE) ] = {
1000                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
1001                 [ C(RESULT_ACCESS) ] = 0x01b7,
1002                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
1003                 [ C(RESULT_MISS)   ] = 0x01b7,
1004         },
1005         [ C(OP_PREFETCH) ] = {
1006                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
1007                 [ C(RESULT_ACCESS) ] = 0x01b7,
1008                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
1009                 [ C(RESULT_MISS)   ] = 0x01b7,
1010         },
1011  },
1012  [ C(DTLB) ] = {
1013         [ C(OP_READ) ] = {
1014                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
1015                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
1016         },
1017         [ C(OP_WRITE) ] = {
1018                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
1019                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
1020         },
1021         [ C(OP_PREFETCH) ] = {
1022                 [ C(RESULT_ACCESS) ] = 0x0,
1023                 [ C(RESULT_MISS)   ] = 0x0,
1024         },
1025  },
1026  [ C(ITLB) ] = {
1027         [ C(OP_READ) ] = {
1028                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
1029                 [ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
1030         },
1031         [ C(OP_WRITE) ] = {
1032                 [ C(RESULT_ACCESS) ] = -1,
1033                 [ C(RESULT_MISS)   ] = -1,
1034         },
1035         [ C(OP_PREFETCH) ] = {
1036                 [ C(RESULT_ACCESS) ] = -1,
1037                 [ C(RESULT_MISS)   ] = -1,
1038         },
1039  },
1040  [ C(BPU ) ] = {
1041         [ C(OP_READ) ] = {
1042                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1043                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
1044         },
1045         [ C(OP_WRITE) ] = {
1046                 [ C(RESULT_ACCESS) ] = -1,
1047                 [ C(RESULT_MISS)   ] = -1,
1048         },
1049         [ C(OP_PREFETCH) ] = {
1050                 [ C(RESULT_ACCESS) ] = -1,
1051                 [ C(RESULT_MISS)   ] = -1,
1052         },
1053  },
1054  [ C(NODE) ] = {
1055         [ C(OP_READ) ] = {
1056                 [ C(RESULT_ACCESS) ] = 0x01b7,
1057                 [ C(RESULT_MISS)   ] = 0x01b7,
1058         },
1059         [ C(OP_WRITE) ] = {
1060                 [ C(RESULT_ACCESS) ] = 0x01b7,
1061                 [ C(RESULT_MISS)   ] = 0x01b7,
1062         },
1063         [ C(OP_PREFETCH) ] = {
1064                 [ C(RESULT_ACCESS) ] = 0x01b7,
1065                 [ C(RESULT_MISS)   ] = 0x01b7,
1066         },
1067  },
1068 };
1069 
1070 /*
1071  * Nehalem/Westmere MSR_OFFCORE_RESPONSE bits;
1072  * See IA32 SDM Vol 3B 30.6.1.3
1073  */
1074 
1075 #define NHM_DMND_DATA_RD        (1 << 0)
1076 #define NHM_DMND_RFO            (1 << 1)
1077 #define NHM_DMND_IFETCH         (1 << 2)
1078 #define NHM_DMND_WB             (1 << 3)
1079 #define NHM_PF_DATA_RD          (1 << 4)
1080 #define NHM_PF_DATA_RFO         (1 << 5)
1081 #define NHM_PF_IFETCH           (1 << 6)
1082 #define NHM_OFFCORE_OTHER       (1 << 7)
1083 #define NHM_UNCORE_HIT          (1 << 8)
1084 #define NHM_OTHER_CORE_HIT_SNP  (1 << 9)
1085 #define NHM_OTHER_CORE_HITM     (1 << 10)
1086                                 /* reserved */
1087 #define NHM_REMOTE_CACHE_FWD    (1 << 12)
1088 #define NHM_REMOTE_DRAM         (1 << 13)
1089 #define NHM_LOCAL_DRAM          (1 << 14)
1090 #define NHM_NON_DRAM            (1 << 15)
1091 
1092 #define NHM_LOCAL               (NHM_LOCAL_DRAM|NHM_REMOTE_CACHE_FWD)
1093 #define NHM_REMOTE              (NHM_REMOTE_DRAM)
1094 
1095 #define NHM_DMND_READ           (NHM_DMND_DATA_RD)
1096 #define NHM_DMND_WRITE          (NHM_DMND_RFO|NHM_DMND_WB)
1097 #define NHM_DMND_PREFETCH       (NHM_PF_DATA_RD|NHM_PF_DATA_RFO)
1098 
1099 #define NHM_L3_HIT      (NHM_UNCORE_HIT|NHM_OTHER_CORE_HIT_SNP|NHM_OTHER_CORE_HITM)
1100 #define NHM_L3_MISS     (NHM_NON_DRAM|NHM_LOCAL_DRAM|NHM_REMOTE_DRAM|NHM_REMOTE_CACHE_FWD)
1101 #define NHM_L3_ACCESS   (NHM_L3_HIT|NHM_L3_MISS)
1102 
1103 static __initconst const u64 nehalem_hw_cache_extra_regs
1104                                 [PERF_COUNT_HW_CACHE_MAX]
1105                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1106                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1107 {
1108  [ C(LL  ) ] = {
1109         [ C(OP_READ) ] = {
1110                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_L3_ACCESS,
1111                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_L3_MISS,
1112         },
1113         [ C(OP_WRITE) ] = {
1114                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_L3_ACCESS,
1115                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_L3_MISS,
1116         },
1117         [ C(OP_PREFETCH) ] = {
1118                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_L3_ACCESS,
1119                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_L3_MISS,
1120         },
1121  },
1122  [ C(NODE) ] = {
1123         [ C(OP_READ) ] = {
1124                 [ C(RESULT_ACCESS) ] = NHM_DMND_READ|NHM_LOCAL|NHM_REMOTE,
1125                 [ C(RESULT_MISS)   ] = NHM_DMND_READ|NHM_REMOTE,
1126         },
1127         [ C(OP_WRITE) ] = {
1128                 [ C(RESULT_ACCESS) ] = NHM_DMND_WRITE|NHM_LOCAL|NHM_REMOTE,
1129                 [ C(RESULT_MISS)   ] = NHM_DMND_WRITE|NHM_REMOTE,
1130         },
1131         [ C(OP_PREFETCH) ] = {
1132                 [ C(RESULT_ACCESS) ] = NHM_DMND_PREFETCH|NHM_LOCAL|NHM_REMOTE,
1133                 [ C(RESULT_MISS)   ] = NHM_DMND_PREFETCH|NHM_REMOTE,
1134         },
1135  },
1136 };
1137 
1138 static __initconst const u64 nehalem_hw_cache_event_ids
1139                                 [PERF_COUNT_HW_CACHE_MAX]
1140                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1141                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1142 {
1143  [ C(L1D) ] = {
1144         [ C(OP_READ) ] = {
1145                 [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
1146                 [ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
1147         },
1148         [ C(OP_WRITE) ] = {
1149                 [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
1150                 [ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
1151         },
1152         [ C(OP_PREFETCH) ] = {
1153                 [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
1154                 [ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
1155         },
1156  },
1157  [ C(L1I ) ] = {
1158         [ C(OP_READ) ] = {
1159                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
1160                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
1161         },
1162         [ C(OP_WRITE) ] = {
1163                 [ C(RESULT_ACCESS) ] = -1,
1164                 [ C(RESULT_MISS)   ] = -1,
1165         },
1166         [ C(OP_PREFETCH) ] = {
1167                 [ C(RESULT_ACCESS) ] = 0x0,
1168                 [ C(RESULT_MISS)   ] = 0x0,
1169         },
1170  },
1171  [ C(LL  ) ] = {
1172         [ C(OP_READ) ] = {
1173                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
1174                 [ C(RESULT_ACCESS) ] = 0x01b7,
1175                 /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */
1176                 [ C(RESULT_MISS)   ] = 0x01b7,
1177         },
1178         /*
1179          * Use RFO, not WRITEBACK, because a write miss would typically occur
1180          * on RFO.
1181          */
1182         [ C(OP_WRITE) ] = {
1183                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
1184                 [ C(RESULT_ACCESS) ] = 0x01b7,
1185                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
1186                 [ C(RESULT_MISS)   ] = 0x01b7,
1187         },
1188         [ C(OP_PREFETCH) ] = {
1189                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
1190                 [ C(RESULT_ACCESS) ] = 0x01b7,
1191                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
1192                 [ C(RESULT_MISS)   ] = 0x01b7,
1193         },
1194  },
1195  [ C(DTLB) ] = {
1196         [ C(OP_READ) ] = {
1197                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI   (alias)  */
1198                 [ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
1199         },
1200         [ C(OP_WRITE) ] = {
1201                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI   (alias)  */
1202                 [ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
1203         },
1204         [ C(OP_PREFETCH) ] = {
1205                 [ C(RESULT_ACCESS) ] = 0x0,
1206                 [ C(RESULT_MISS)   ] = 0x0,
1207         },
1208  },
1209  [ C(ITLB) ] = {
1210         [ C(OP_READ) ] = {
1211                 [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
1212                 [ C(RESULT_MISS)   ] = 0x20c8, /* ITLB_MISS_RETIRED            */
1213         },
1214         [ C(OP_WRITE) ] = {
1215                 [ C(RESULT_ACCESS) ] = -1,
1216                 [ C(RESULT_MISS)   ] = -1,
1217         },
1218         [ C(OP_PREFETCH) ] = {
1219                 [ C(RESULT_ACCESS) ] = -1,
1220                 [ C(RESULT_MISS)   ] = -1,
1221         },
1222  },
1223  [ C(BPU ) ] = {
1224         [ C(OP_READ) ] = {
1225                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
1226                 [ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
1227         },
1228         [ C(OP_WRITE) ] = {
1229                 [ C(RESULT_ACCESS) ] = -1,
1230                 [ C(RESULT_MISS)   ] = -1,
1231         },
1232         [ C(OP_PREFETCH) ] = {
1233                 [ C(RESULT_ACCESS) ] = -1,
1234                 [ C(RESULT_MISS)   ] = -1,
1235         },
1236  },
1237  [ C(NODE) ] = {
1238         [ C(OP_READ) ] = {
1239                 [ C(RESULT_ACCESS) ] = 0x01b7,
1240                 [ C(RESULT_MISS)   ] = 0x01b7,
1241         },
1242         [ C(OP_WRITE) ] = {
1243                 [ C(RESULT_ACCESS) ] = 0x01b7,
1244                 [ C(RESULT_MISS)   ] = 0x01b7,
1245         },
1246         [ C(OP_PREFETCH) ] = {
1247                 [ C(RESULT_ACCESS) ] = 0x01b7,
1248                 [ C(RESULT_MISS)   ] = 0x01b7,
1249         },
1250  },
1251 };
1252 
1253 static __initconst const u64 core2_hw_cache_event_ids
1254                                 [PERF_COUNT_HW_CACHE_MAX]
1255                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1256                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1257 {
1258  [ C(L1D) ] = {
1259         [ C(OP_READ) ] = {
1260                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI          */
1261                 [ C(RESULT_MISS)   ] = 0x0140, /* L1D_CACHE_LD.I_STATE       */
1262         },
1263         [ C(OP_WRITE) ] = {
1264                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI          */
1265                 [ C(RESULT_MISS)   ] = 0x0141, /* L1D_CACHE_ST.I_STATE       */
1266         },
1267         [ C(OP_PREFETCH) ] = {
1268                 [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS      */
1269                 [ C(RESULT_MISS)   ] = 0,
1270         },
1271  },
1272  [ C(L1I ) ] = {
1273         [ C(OP_READ) ] = {
1274                 [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS                  */
1275                 [ C(RESULT_MISS)   ] = 0x0081, /* L1I.MISSES                 */
1276         },
1277         [ C(OP_WRITE) ] = {
1278                 [ C(RESULT_ACCESS) ] = -1,
1279                 [ C(RESULT_MISS)   ] = -1,
1280         },
1281         [ C(OP_PREFETCH) ] = {
1282                 [ C(RESULT_ACCESS) ] = 0,
1283                 [ C(RESULT_MISS)   ] = 0,
1284         },
1285  },
1286  [ C(LL  ) ] = {
1287         [ C(OP_READ) ] = {
1288                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
1289                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
1290         },
1291         [ C(OP_WRITE) ] = {
1292                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
1293                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
1294         },
1295         [ C(OP_PREFETCH) ] = {
1296                 [ C(RESULT_ACCESS) ] = 0,
1297                 [ C(RESULT_MISS)   ] = 0,
1298         },
1299  },
1300  [ C(DTLB) ] = {
1301         [ C(OP_READ) ] = {
1302                 [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI  (alias) */
1303                 [ C(RESULT_MISS)   ] = 0x0208, /* DTLB_MISSES.MISS_LD        */
1304         },
1305         [ C(OP_WRITE) ] = {
1306                 [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI  (alias) */
1307                 [ C(RESULT_MISS)   ] = 0x0808, /* DTLB_MISSES.MISS_ST        */
1308         },
1309         [ C(OP_PREFETCH) ] = {
1310                 [ C(RESULT_ACCESS) ] = 0,
1311                 [ C(RESULT_MISS)   ] = 0,
1312         },
1313  },
1314  [ C(ITLB) ] = {
1315         [ C(OP_READ) ] = {
1316                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
1317                 [ C(RESULT_MISS)   ] = 0x1282, /* ITLBMISSES                 */
1318         },
1319         [ C(OP_WRITE) ] = {
1320                 [ C(RESULT_ACCESS) ] = -1,
1321                 [ C(RESULT_MISS)   ] = -1,
1322         },
1323         [ C(OP_PREFETCH) ] = {
1324                 [ C(RESULT_ACCESS) ] = -1,
1325                 [ C(RESULT_MISS)   ] = -1,
1326         },
1327  },
1328  [ C(BPU ) ] = {
1329         [ C(OP_READ) ] = {
1330                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
1331                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
1332         },
1333         [ C(OP_WRITE) ] = {
1334                 [ C(RESULT_ACCESS) ] = -1,
1335                 [ C(RESULT_MISS)   ] = -1,
1336         },
1337         [ C(OP_PREFETCH) ] = {
1338                 [ C(RESULT_ACCESS) ] = -1,
1339                 [ C(RESULT_MISS)   ] = -1,
1340         },
1341  },
1342 };
1343 
1344 static __initconst const u64 atom_hw_cache_event_ids
1345                                 [PERF_COUNT_HW_CACHE_MAX]
1346                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1347                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1348 {
1349  [ C(L1D) ] = {
1350         [ C(OP_READ) ] = {
1351                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD               */
1352                 [ C(RESULT_MISS)   ] = 0,
1353         },
1354         [ C(OP_WRITE) ] = {
1355                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST               */
1356                 [ C(RESULT_MISS)   ] = 0,
1357         },
1358         [ C(OP_PREFETCH) ] = {
1359                 [ C(RESULT_ACCESS) ] = 0x0,
1360                 [ C(RESULT_MISS)   ] = 0,
1361         },
1362  },
1363  [ C(L1I ) ] = {
1364         [ C(OP_READ) ] = {
1365                 [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                  */
1366                 [ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                 */
1367         },
1368         [ C(OP_WRITE) ] = {
1369                 [ C(RESULT_ACCESS) ] = -1,
1370                 [ C(RESULT_MISS)   ] = -1,
1371         },
1372         [ C(OP_PREFETCH) ] = {
1373                 [ C(RESULT_ACCESS) ] = 0,
1374                 [ C(RESULT_MISS)   ] = 0,
1375         },
1376  },
1377  [ C(LL  ) ] = {
1378         [ C(OP_READ) ] = {
1379                 [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI                 */
1380                 [ C(RESULT_MISS)   ] = 0x4129, /* L2_LD.ISTATE               */
1381         },
1382         [ C(OP_WRITE) ] = {
1383                 [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI                 */
1384                 [ C(RESULT_MISS)   ] = 0x412A, /* L2_ST.ISTATE               */
1385         },
1386         [ C(OP_PREFETCH) ] = {
1387                 [ C(RESULT_ACCESS) ] = 0,
1388                 [ C(RESULT_MISS)   ] = 0,
1389         },
1390  },
1391  [ C(DTLB) ] = {
1392         [ C(OP_READ) ] = {
1393                 [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI  (alias) */
1394                 [ C(RESULT_MISS)   ] = 0x0508, /* DTLB_MISSES.MISS_LD        */
1395         },
1396         [ C(OP_WRITE) ] = {
1397                 [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI  (alias) */
1398                 [ C(RESULT_MISS)   ] = 0x0608, /* DTLB_MISSES.MISS_ST        */
1399         },
1400         [ C(OP_PREFETCH) ] = {
1401                 [ C(RESULT_ACCESS) ] = 0,
1402                 [ C(RESULT_MISS)   ] = 0,
1403         },
1404  },
1405  [ C(ITLB) ] = {
1406         [ C(OP_READ) ] = {
1407                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P         */
1408                 [ C(RESULT_MISS)   ] = 0x0282, /* ITLB.MISSES                */
1409         },
1410         [ C(OP_WRITE) ] = {
1411                 [ C(RESULT_ACCESS) ] = -1,
1412                 [ C(RESULT_MISS)   ] = -1,
1413         },
1414         [ C(OP_PREFETCH) ] = {
1415                 [ C(RESULT_ACCESS) ] = -1,
1416                 [ C(RESULT_MISS)   ] = -1,
1417         },
1418  },
1419  [ C(BPU ) ] = {
1420         [ C(OP_READ) ] = {
1421                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY        */
1422                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED    */
1423         },
1424         [ C(OP_WRITE) ] = {
1425                 [ C(RESULT_ACCESS) ] = -1,
1426                 [ C(RESULT_MISS)   ] = -1,
1427         },
1428         [ C(OP_PREFETCH) ] = {
1429                 [ C(RESULT_ACCESS) ] = -1,
1430                 [ C(RESULT_MISS)   ] = -1,
1431         },
1432  },
1433 };
1434 
1435 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_slm, "event=0x3c");
1436 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_slm, "2");
1437 /* no_alloc_cycles.not_delivered */
1438 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_slm,
1439                "event=0xca,umask=0x50");
1440 EVENT_ATTR_STR(topdown-fetch-bubbles.scale, td_fetch_bubbles_scale_slm, "2");
1441 /* uops_retired.all */
1442 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_slm,
1443                "event=0xc2,umask=0x10");
1444 /* uops_retired.all */
1445 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_slm,
1446                "event=0xc2,umask=0x10");
1447 
1448 static struct attribute *slm_events_attrs[] = {
1449         EVENT_PTR(td_total_slots_slm),
1450         EVENT_PTR(td_total_slots_scale_slm),
1451         EVENT_PTR(td_fetch_bubbles_slm),
1452         EVENT_PTR(td_fetch_bubbles_scale_slm),
1453         EVENT_PTR(td_slots_issued_slm),
1454         EVENT_PTR(td_slots_retired_slm),
1455         NULL
1456 };
1457 
1458 static struct extra_reg intel_slm_extra_regs[] __read_mostly =
1459 {
1460         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1461         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
1462         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x368005ffffull, RSP_1),
1463         EVENT_EXTRA_END
1464 };
1465 
1466 #define SLM_DMND_READ           SNB_DMND_DATA_RD
1467 #define SLM_DMND_WRITE          SNB_DMND_RFO
1468 #define SLM_DMND_PREFETCH       (SNB_PF_DATA_RD|SNB_PF_RFO)
1469 
1470 #define SLM_SNP_ANY             (SNB_SNP_NONE|SNB_SNP_MISS|SNB_NO_FWD|SNB_HITM)
1471 #define SLM_LLC_ACCESS          SNB_RESP_ANY
1472 #define SLM_LLC_MISS            (SLM_SNP_ANY|SNB_NON_DRAM)
1473 
1474 static __initconst const u64 slm_hw_cache_extra_regs
1475                                 [PERF_COUNT_HW_CACHE_MAX]
1476                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1477                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1478 {
1479  [ C(LL  ) ] = {
1480         [ C(OP_READ) ] = {
1481                 [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS,
1482                 [ C(RESULT_MISS)   ] = 0,
1483         },
1484         [ C(OP_WRITE) ] = {
1485                 [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS,
1486                 [ C(RESULT_MISS)   ] = SLM_DMND_WRITE|SLM_LLC_MISS,
1487         },
1488         [ C(OP_PREFETCH) ] = {
1489                 [ C(RESULT_ACCESS) ] = SLM_DMND_PREFETCH|SLM_LLC_ACCESS,
1490                 [ C(RESULT_MISS)   ] = SLM_DMND_PREFETCH|SLM_LLC_MISS,
1491         },
1492  },
1493 };
1494 
1495 static __initconst const u64 slm_hw_cache_event_ids
1496                                 [PERF_COUNT_HW_CACHE_MAX]
1497                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1498                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] =
1499 {
1500  [ C(L1D) ] = {
1501         [ C(OP_READ) ] = {
1502                 [ C(RESULT_ACCESS) ] = 0,
1503                 [ C(RESULT_MISS)   ] = 0x0104, /* LD_DCU_MISS */
1504         },
1505         [ C(OP_WRITE) ] = {
1506                 [ C(RESULT_ACCESS) ] = 0,
1507                 [ C(RESULT_MISS)   ] = 0,
1508         },
1509         [ C(OP_PREFETCH) ] = {
1510                 [ C(RESULT_ACCESS) ] = 0,
1511                 [ C(RESULT_MISS)   ] = 0,
1512         },
1513  },
1514  [ C(L1I ) ] = {
1515         [ C(OP_READ) ] = {
1516                 [ C(RESULT_ACCESS) ] = 0x0380, /* ICACHE.ACCESSES */
1517                 [ C(RESULT_MISS)   ] = 0x0280, /* ICACGE.MISSES */
1518         },
1519         [ C(OP_WRITE) ] = {
1520                 [ C(RESULT_ACCESS) ] = -1,
1521                 [ C(RESULT_MISS)   ] = -1,
1522         },
1523         [ C(OP_PREFETCH) ] = {
1524                 [ C(RESULT_ACCESS) ] = 0,
1525                 [ C(RESULT_MISS)   ] = 0,
1526         },
1527  },
1528  [ C(LL  ) ] = {
1529         [ C(OP_READ) ] = {
1530                 /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */
1531                 [ C(RESULT_ACCESS) ] = 0x01b7,
1532                 [ C(RESULT_MISS)   ] = 0,
1533         },
1534         [ C(OP_WRITE) ] = {
1535                 /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */
1536                 [ C(RESULT_ACCESS) ] = 0x01b7,
1537                 /* OFFCORE_RESPONSE.ANY_RFO.ANY_LLC_MISS */
1538                 [ C(RESULT_MISS)   ] = 0x01b7,
1539         },
1540         [ C(OP_PREFETCH) ] = {
1541                 /* OFFCORE_RESPONSE.PREFETCH.LOCAL_CACHE */
1542                 [ C(RESULT_ACCESS) ] = 0x01b7,
1543                 /* OFFCORE_RESPONSE.PREFETCH.ANY_LLC_MISS */
1544                 [ C(RESULT_MISS)   ] = 0x01b7,
1545         },
1546  },
1547  [ C(DTLB) ] = {
1548         [ C(OP_READ) ] = {
1549                 [ C(RESULT_ACCESS) ] = 0,
1550                 [ C(RESULT_MISS)   ] = 0x0804, /* LD_DTLB_MISS */
1551         },
1552         [ C(OP_WRITE) ] = {
1553                 [ C(RESULT_ACCESS) ] = 0,
1554                 [ C(RESULT_MISS)   ] = 0,
1555         },
1556         [ C(OP_PREFETCH) ] = {
1557                 [ C(RESULT_ACCESS) ] = 0,
1558                 [ C(RESULT_MISS)   ] = 0,
1559         },
1560  },
1561  [ C(ITLB) ] = {
1562         [ C(OP_READ) ] = {
1563                 [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */
1564                 [ C(RESULT_MISS)   ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */
1565         },
1566         [ C(OP_WRITE) ] = {
1567                 [ C(RESULT_ACCESS) ] = -1,
1568                 [ C(RESULT_MISS)   ] = -1,
1569         },
1570         [ C(OP_PREFETCH) ] = {
1571                 [ C(RESULT_ACCESS) ] = -1,
1572                 [ C(RESULT_MISS)   ] = -1,
1573         },
1574  },
1575  [ C(BPU ) ] = {
1576         [ C(OP_READ) ] = {
1577                 [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */
1578                 [ C(RESULT_MISS)   ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */
1579         },
1580         [ C(OP_WRITE) ] = {
1581                 [ C(RESULT_ACCESS) ] = -1,
1582                 [ C(RESULT_MISS)   ] = -1,
1583         },
1584         [ C(OP_PREFETCH) ] = {
1585                 [ C(RESULT_ACCESS) ] = -1,
1586                 [ C(RESULT_MISS)   ] = -1,
1587         },
1588  },
1589 };
1590 
1591 EVENT_ATTR_STR(topdown-total-slots, td_total_slots_glm, "event=0x3c");
1592 EVENT_ATTR_STR(topdown-total-slots.scale, td_total_slots_scale_glm, "3");
1593 /* UOPS_NOT_DELIVERED.ANY */
1594 EVENT_ATTR_STR(topdown-fetch-bubbles, td_fetch_bubbles_glm, "event=0x9c");
1595 /* ISSUE_SLOTS_NOT_CONSUMED.RECOVERY */
1596 EVENT_ATTR_STR(topdown-recovery-bubbles, td_recovery_bubbles_glm, "event=0xca,umask=0x02");
1597 /* UOPS_RETIRED.ANY */
1598 EVENT_ATTR_STR(topdown-slots-retired, td_slots_retired_glm, "event=0xc2");
1599 /* UOPS_ISSUED.ANY */
1600 EVENT_ATTR_STR(topdown-slots-issued, td_slots_issued_glm, "event=0x0e");
1601 
1602 static struct attribute *glm_events_attrs[] = {
1603         EVENT_PTR(td_total_slots_glm),
1604         EVENT_PTR(td_total_slots_scale_glm),
1605         EVENT_PTR(td_fetch_bubbles_glm),
1606         EVENT_PTR(td_recovery_bubbles_glm),
1607         EVENT_PTR(td_slots_issued_glm),
1608         EVENT_PTR(td_slots_retired_glm),
1609         NULL
1610 };
1611 
1612 static struct extra_reg intel_glm_extra_regs[] __read_mostly = {
1613         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1614         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x760005ffbfull, RSP_0),
1615         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x360005ffbfull, RSP_1),
1616         EVENT_EXTRA_END
1617 };
1618 
1619 #define GLM_DEMAND_DATA_RD              BIT_ULL(0)
1620 #define GLM_DEMAND_RFO                  BIT_ULL(1)
1621 #define GLM_ANY_RESPONSE                BIT_ULL(16)
1622 #define GLM_SNP_NONE_OR_MISS            BIT_ULL(33)
1623 #define GLM_DEMAND_READ                 GLM_DEMAND_DATA_RD
1624 #define GLM_DEMAND_WRITE                GLM_DEMAND_RFO
1625 #define GLM_DEMAND_PREFETCH             (SNB_PF_DATA_RD|SNB_PF_RFO)
1626 #define GLM_LLC_ACCESS                  GLM_ANY_RESPONSE
1627 #define GLM_SNP_ANY                     (GLM_SNP_NONE_OR_MISS|SNB_NO_FWD|SNB_HITM)
1628 #define GLM_LLC_MISS                    (GLM_SNP_ANY|SNB_NON_DRAM)
1629 
1630 static __initconst const u64 glm_hw_cache_event_ids
1631                                 [PERF_COUNT_HW_CACHE_MAX]
1632                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1633                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1634         [C(L1D)] = {
1635                 [C(OP_READ)] = {
1636                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1637                         [C(RESULT_MISS)]        = 0x0,
1638                 },
1639                 [C(OP_WRITE)] = {
1640                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1641                         [C(RESULT_MISS)]        = 0x0,
1642                 },
1643                 [C(OP_PREFETCH)] = {
1644                         [C(RESULT_ACCESS)]      = 0x0,
1645                         [C(RESULT_MISS)]        = 0x0,
1646                 },
1647         },
1648         [C(L1I)] = {
1649                 [C(OP_READ)] = {
1650                         [C(RESULT_ACCESS)]      = 0x0380,       /* ICACHE.ACCESSES */
1651                         [C(RESULT_MISS)]        = 0x0280,       /* ICACHE.MISSES */
1652                 },
1653                 [C(OP_WRITE)] = {
1654                         [C(RESULT_ACCESS)]      = -1,
1655                         [C(RESULT_MISS)]        = -1,
1656                 },
1657                 [C(OP_PREFETCH)] = {
1658                         [C(RESULT_ACCESS)]      = 0x0,
1659                         [C(RESULT_MISS)]        = 0x0,
1660                 },
1661         },
1662         [C(LL)] = {
1663                 [C(OP_READ)] = {
1664                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1665                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1666                 },
1667                 [C(OP_WRITE)] = {
1668                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1669                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1670                 },
1671                 [C(OP_PREFETCH)] = {
1672                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1673                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1674                 },
1675         },
1676         [C(DTLB)] = {
1677                 [C(OP_READ)] = {
1678                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1679                         [C(RESULT_MISS)]        = 0x0,
1680                 },
1681                 [C(OP_WRITE)] = {
1682                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1683                         [C(RESULT_MISS)]        = 0x0,
1684                 },
1685                 [C(OP_PREFETCH)] = {
1686                         [C(RESULT_ACCESS)]      = 0x0,
1687                         [C(RESULT_MISS)]        = 0x0,
1688                 },
1689         },
1690         [C(ITLB)] = {
1691                 [C(OP_READ)] = {
1692                         [C(RESULT_ACCESS)]      = 0x00c0,       /* INST_RETIRED.ANY_P */
1693                         [C(RESULT_MISS)]        = 0x0481,       /* ITLB.MISS */
1694                 },
1695                 [C(OP_WRITE)] = {
1696                         [C(RESULT_ACCESS)]      = -1,
1697                         [C(RESULT_MISS)]        = -1,
1698                 },
1699                 [C(OP_PREFETCH)] = {
1700                         [C(RESULT_ACCESS)]      = -1,
1701                         [C(RESULT_MISS)]        = -1,
1702                 },
1703         },
1704         [C(BPU)] = {
1705                 [C(OP_READ)] = {
1706                         [C(RESULT_ACCESS)]      = 0x00c4,       /* BR_INST_RETIRED.ALL_BRANCHES */
1707                         [C(RESULT_MISS)]        = 0x00c5,       /* BR_MISP_RETIRED.ALL_BRANCHES */
1708                 },
1709                 [C(OP_WRITE)] = {
1710                         [C(RESULT_ACCESS)]      = -1,
1711                         [C(RESULT_MISS)]        = -1,
1712                 },
1713                 [C(OP_PREFETCH)] = {
1714                         [C(RESULT_ACCESS)]      = -1,
1715                         [C(RESULT_MISS)]        = -1,
1716                 },
1717         },
1718 };
1719 
1720 static __initconst const u64 glm_hw_cache_extra_regs
1721                                 [PERF_COUNT_HW_CACHE_MAX]
1722                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1723                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1724         [C(LL)] = {
1725                 [C(OP_READ)] = {
1726                         [C(RESULT_ACCESS)]      = GLM_DEMAND_READ|
1727                                                   GLM_LLC_ACCESS,
1728                         [C(RESULT_MISS)]        = GLM_DEMAND_READ|
1729                                                   GLM_LLC_MISS,
1730                 },
1731                 [C(OP_WRITE)] = {
1732                         [C(RESULT_ACCESS)]      = GLM_DEMAND_WRITE|
1733                                                   GLM_LLC_ACCESS,
1734                         [C(RESULT_MISS)]        = GLM_DEMAND_WRITE|
1735                                                   GLM_LLC_MISS,
1736                 },
1737                 [C(OP_PREFETCH)] = {
1738                         [C(RESULT_ACCESS)]      = GLM_DEMAND_PREFETCH|
1739                                                   GLM_LLC_ACCESS,
1740                         [C(RESULT_MISS)]        = GLM_DEMAND_PREFETCH|
1741                                                   GLM_LLC_MISS,
1742                 },
1743         },
1744 };
1745 
1746 static __initconst const u64 glp_hw_cache_event_ids
1747                                 [PERF_COUNT_HW_CACHE_MAX]
1748                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1749                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1750         [C(L1D)] = {
1751                 [C(OP_READ)] = {
1752                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1753                         [C(RESULT_MISS)]        = 0x0,
1754                 },
1755                 [C(OP_WRITE)] = {
1756                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1757                         [C(RESULT_MISS)]        = 0x0,
1758                 },
1759                 [C(OP_PREFETCH)] = {
1760                         [C(RESULT_ACCESS)]      = 0x0,
1761                         [C(RESULT_MISS)]        = 0x0,
1762                 },
1763         },
1764         [C(L1I)] = {
1765                 [C(OP_READ)] = {
1766                         [C(RESULT_ACCESS)]      = 0x0380,       /* ICACHE.ACCESSES */
1767                         [C(RESULT_MISS)]        = 0x0280,       /* ICACHE.MISSES */
1768                 },
1769                 [C(OP_WRITE)] = {
1770                         [C(RESULT_ACCESS)]      = -1,
1771                         [C(RESULT_MISS)]        = -1,
1772                 },
1773                 [C(OP_PREFETCH)] = {
1774                         [C(RESULT_ACCESS)]      = 0x0,
1775                         [C(RESULT_MISS)]        = 0x0,
1776                 },
1777         },
1778         [C(LL)] = {
1779                 [C(OP_READ)] = {
1780                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1781                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1782                 },
1783                 [C(OP_WRITE)] = {
1784                         [C(RESULT_ACCESS)]      = 0x1b7,        /* OFFCORE_RESPONSE */
1785                         [C(RESULT_MISS)]        = 0x1b7,        /* OFFCORE_RESPONSE */
1786                 },
1787                 [C(OP_PREFETCH)] = {
1788                         [C(RESULT_ACCESS)]      = 0x0,
1789                         [C(RESULT_MISS)]        = 0x0,
1790                 },
1791         },
1792         [C(DTLB)] = {
1793                 [C(OP_READ)] = {
1794                         [C(RESULT_ACCESS)]      = 0x81d0,       /* MEM_UOPS_RETIRED.ALL_LOADS */
1795                         [C(RESULT_MISS)]        = 0xe08,        /* DTLB_LOAD_MISSES.WALK_COMPLETED */
1796                 },
1797                 [C(OP_WRITE)] = {
1798                         [C(RESULT_ACCESS)]      = 0x82d0,       /* MEM_UOPS_RETIRED.ALL_STORES */
1799                         [C(RESULT_MISS)]        = 0xe49,        /* DTLB_STORE_MISSES.WALK_COMPLETED */
1800                 },
1801                 [C(OP_PREFETCH)] = {
1802                         [C(RESULT_ACCESS)]      = 0x0,
1803                         [C(RESULT_MISS)]        = 0x0,
1804                 },
1805         },
1806         [C(ITLB)] = {
1807                 [C(OP_READ)] = {
1808                         [C(RESULT_ACCESS)]      = 0x00c0,       /* INST_RETIRED.ANY_P */
1809                         [C(RESULT_MISS)]        = 0x0481,       /* ITLB.MISS */
1810                 },
1811                 [C(OP_WRITE)] = {
1812                         [C(RESULT_ACCESS)]      = -1,
1813                         [C(RESULT_MISS)]        = -1,
1814                 },
1815                 [C(OP_PREFETCH)] = {
1816                         [C(RESULT_ACCESS)]      = -1,
1817                         [C(RESULT_MISS)]        = -1,
1818                 },
1819         },
1820         [C(BPU)] = {
1821                 [C(OP_READ)] = {
1822                         [C(RESULT_ACCESS)]      = 0x00c4,       /* BR_INST_RETIRED.ALL_BRANCHES */
1823                         [C(RESULT_MISS)]        = 0x00c5,       /* BR_MISP_RETIRED.ALL_BRANCHES */
1824                 },
1825                 [C(OP_WRITE)] = {
1826                         [C(RESULT_ACCESS)]      = -1,
1827                         [C(RESULT_MISS)]        = -1,
1828                 },
1829                 [C(OP_PREFETCH)] = {
1830                         [C(RESULT_ACCESS)]      = -1,
1831                         [C(RESULT_MISS)]        = -1,
1832                 },
1833         },
1834 };
1835 
1836 static __initconst const u64 glp_hw_cache_extra_regs
1837                                 [PERF_COUNT_HW_CACHE_MAX]
1838                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1839                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1840         [C(LL)] = {
1841                 [C(OP_READ)] = {
1842                         [C(RESULT_ACCESS)]      = GLM_DEMAND_READ|
1843                                                   GLM_LLC_ACCESS,
1844                         [C(RESULT_MISS)]        = GLM_DEMAND_READ|
1845                                                   GLM_LLC_MISS,
1846                 },
1847                 [C(OP_WRITE)] = {
1848                         [C(RESULT_ACCESS)]      = GLM_DEMAND_WRITE|
1849                                                   GLM_LLC_ACCESS,
1850                         [C(RESULT_MISS)]        = GLM_DEMAND_WRITE|
1851                                                   GLM_LLC_MISS,
1852                 },
1853                 [C(OP_PREFETCH)] = {
1854                         [C(RESULT_ACCESS)]      = 0x0,
1855                         [C(RESULT_MISS)]        = 0x0,
1856                 },
1857         },
1858 };
1859 
1860 #define TNT_LOCAL_DRAM                  BIT_ULL(26)
1861 #define TNT_DEMAND_READ                 GLM_DEMAND_DATA_RD
1862 #define TNT_DEMAND_WRITE                GLM_DEMAND_RFO
1863 #define TNT_LLC_ACCESS                  GLM_ANY_RESPONSE
1864 #define TNT_SNP_ANY                     (SNB_SNP_NOT_NEEDED|SNB_SNP_MISS| \
1865                                          SNB_NO_FWD|SNB_SNP_FWD|SNB_HITM)
1866 #define TNT_LLC_MISS                    (TNT_SNP_ANY|SNB_NON_DRAM|TNT_LOCAL_DRAM)
1867 
1868 static __initconst const u64 tnt_hw_cache_extra_regs
1869                                 [PERF_COUNT_HW_CACHE_MAX]
1870                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1871                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1872         [C(LL)] = {
1873                 [C(OP_READ)] = {
1874                         [C(RESULT_ACCESS)]      = TNT_DEMAND_READ|
1875                                                   TNT_LLC_ACCESS,
1876                         [C(RESULT_MISS)]        = TNT_DEMAND_READ|
1877                                                   TNT_LLC_MISS,
1878                 },
1879                 [C(OP_WRITE)] = {
1880                         [C(RESULT_ACCESS)]      = TNT_DEMAND_WRITE|
1881                                                   TNT_LLC_ACCESS,
1882                         [C(RESULT_MISS)]        = TNT_DEMAND_WRITE|
1883                                                   TNT_LLC_MISS,
1884                 },
1885                 [C(OP_PREFETCH)] = {
1886                         [C(RESULT_ACCESS)]      = 0x0,
1887                         [C(RESULT_MISS)]        = 0x0,
1888                 },
1889         },
1890 };
1891 
1892 static struct extra_reg intel_tnt_extra_regs[] __read_mostly = {
1893         /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
1894         INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0),
1895         INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1),
1896         EVENT_EXTRA_END
1897 };
1898 
1899 #define KNL_OT_L2_HITE          BIT_ULL(19) /* Other Tile L2 Hit */
1900 #define KNL_OT_L2_HITF          BIT_ULL(20) /* Other Tile L2 Hit */
1901 #define KNL_MCDRAM_LOCAL        BIT_ULL(21)
1902 #define KNL_MCDRAM_FAR          BIT_ULL(22)
1903 #define KNL_DDR_LOCAL           BIT_ULL(23)
1904 #define KNL_DDR_FAR             BIT_ULL(24)
1905 #define KNL_DRAM_ANY            (KNL_MCDRAM_LOCAL | KNL_MCDRAM_FAR | \
1906                                     KNL_DDR_LOCAL | KNL_DDR_FAR)
1907 #define KNL_L2_READ             SLM_DMND_READ
1908 #define KNL_L2_WRITE            SLM_DMND_WRITE
1909 #define KNL_L2_PREFETCH         SLM_DMND_PREFETCH
1910 #define KNL_L2_ACCESS           SLM_LLC_ACCESS
1911 #define KNL_L2_MISS             (KNL_OT_L2_HITE | KNL_OT_L2_HITF | \
1912                                    KNL_DRAM_ANY | SNB_SNP_ANY | \
1913                                                   SNB_NON_DRAM)
1914 
1915 static __initconst const u64 knl_hw_cache_extra_regs
1916                                 [PERF_COUNT_HW_CACHE_MAX]
1917                                 [PERF_COUNT_HW_CACHE_OP_MAX]
1918                                 [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
1919         [C(LL)] = {
1920                 [C(OP_READ)] = {
1921                         [C(RESULT_ACCESS)] = KNL_L2_READ | KNL_L2_ACCESS,
1922                         [C(RESULT_MISS)]   = 0,
1923                 },
1924                 [C(OP_WRITE)] = {
1925                         [C(RESULT_ACCESS)] = KNL_L2_WRITE | KNL_L2_ACCESS,
1926                         [C(RESULT_MISS)]   = KNL_L2_WRITE | KNL_L2_MISS,
1927                 },
1928                 [C(OP_PREFETCH)] = {
1929                         [C(RESULT_ACCESS)] = KNL_L2_PREFETCH | KNL_L2_ACCESS,
1930                         [C(RESULT_MISS)]   = KNL_L2_PREFETCH | KNL_L2_MISS,
1931                 },
1932         },
1933 };
1934 
1935 /*
1936  * Used from PMIs where the LBRs are already disabled.
1937  *
1938  * This function could be called consecutively. It is required to remain in
1939  * disabled state if called consecutively.
1940  *
1941  * During consecutive calls, the same disable value will be written to related
1942  * registers, so the PMU state remains unchanged.
1943  *
1944  * intel_bts events don't coexist with intel PMU's BTS events because of
1945  * x86_add_exclusive(x86_lbr_exclusive_lbr); there's no need to keep them
1946  * disabled around intel PMU's event batching etc, only inside the PMI handler.
1947  */
1948 static void __intel_pmu_disable_all(void)
1949 {
1950         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1951 
1952         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
1953 
1954         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
1955                 intel_pmu_disable_bts();
1956 
1957         intel_pmu_pebs_disable_all();
1958 }
1959 
1960 static void intel_pmu_disable_all(void)
1961 {
1962         __intel_pmu_disable_all();
1963         intel_pmu_lbr_disable_all();
1964 }
1965 
1966 static void __intel_pmu_enable_all(int added, bool pmi)
1967 {
1968         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
1969 
1970         intel_pmu_pebs_enable_all();
1971         intel_pmu_lbr_enable_all(pmi);
1972         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL,
1973                         x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask);
1974 
1975         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
1976                 struct perf_event *event =
1977                         cpuc->events[INTEL_PMC_IDX_FIXED_BTS];
1978 
1979                 if (WARN_ON_ONCE(!event))
1980                         return;
1981 
1982                 intel_pmu_enable_bts(event->hw.config);
1983         }
1984 }
1985 
1986 static void intel_pmu_enable_all(int added)
1987 {
1988         __intel_pmu_enable_all(added, false);
1989 }
1990 
1991 /*
1992  * Workaround for:
1993  *   Intel Errata AAK100 (model 26)
1994  *   Intel Errata AAP53  (model 30)
1995  *   Intel Errata BD53   (model 44)
1996  *
1997  * The official story:
1998  *   These chips need to be 'reset' when adding counters by programming the
1999  *   magic three (non-counting) events 0x4300B5, 0x4300D2, and 0x4300B1 either
2000  *   in sequence on the same PMC or on different PMCs.
2001  *
2002  * In practise it appears some of these events do in fact count, and
2003  * we need to program all 4 events.
2004  */
2005 static void intel_pmu_nhm_workaround(void)
2006 {
2007         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2008         static const unsigned long nhm_magic[4] = {
2009                 0x4300B5,
2010                 0x4300D2,
2011                 0x4300B1,
2012                 0x4300B1
2013         };
2014         struct perf_event *event;
2015         int i;
2016 
2017         /*
2018          * The Errata requires below steps:
2019          * 1) Clear MSR_IA32_PEBS_ENABLE and MSR_CORE_PERF_GLOBAL_CTRL;
2020          * 2) Configure 4 PERFEVTSELx with the magic events and clear
2021          *    the corresponding PMCx;
2022          * 3) set bit0~bit3 of MSR_CORE_PERF_GLOBAL_CTRL;
2023          * 4) Clear MSR_CORE_PERF_GLOBAL_CTRL;
2024          * 5) Clear 4 pairs of ERFEVTSELx and PMCx;
2025          */
2026 
2027         /*
2028          * The real steps we choose are a little different from above.
2029          * A) To reduce MSR operations, we don't run step 1) as they
2030          *    are already cleared before this function is called;
2031          * B) Call x86_perf_event_update to save PMCx before configuring
2032          *    PERFEVTSELx with magic number;
2033          * C) With step 5), we do clear only when the PERFEVTSELx is
2034          *    not used currently.
2035          * D) Call x86_perf_event_set_period to restore PMCx;
2036          */
2037 
2038         /* We always operate 4 pairs of PERF Counters */
2039         for (i = 0; i < 4; i++) {
2040                 event = cpuc->events[i];
2041                 if (event)
2042                         x86_perf_event_update(event);
2043         }
2044 
2045         for (i = 0; i < 4; i++) {
2046                 wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, nhm_magic[i]);
2047                 wrmsrl(MSR_ARCH_PERFMON_PERFCTR0 + i, 0x0);
2048         }
2049 
2050         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0xf);
2051         wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
2052 
2053         for (i = 0; i < 4; i++) {
2054                 event = cpuc->events[i];
2055 
2056                 if (event) {
2057                         x86_perf_event_set_period(event);
2058                         __x86_pmu_enable_event(&event->hw,
2059                                         ARCH_PERFMON_EVENTSEL_ENABLE);
2060                 } else
2061                         wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + i, 0x0);
2062         }
2063 }
2064 
2065 static void intel_pmu_nhm_enable_all(int added)
2066 {
2067         if (added)
2068                 intel_pmu_nhm_workaround();
2069         intel_pmu_enable_all(added);
2070 }
2071 
2072 static void intel_set_tfa(struct cpu_hw_events *cpuc, bool on)
2073 {
2074         u64 val = on ? MSR_TFA_RTM_FORCE_ABORT : 0;
2075 
2076         if (cpuc->tfa_shadow != val) {
2077                 cpuc->tfa_shadow = val;
2078                 wrmsrl(MSR_TSX_FORCE_ABORT, val);
2079         }
2080 }
2081 
2082 static void intel_tfa_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
2083 {
2084         /*
2085          * We're going to use PMC3, make sure TFA is set before we touch it.
2086          */
2087         if (cntr == 3)
2088                 intel_set_tfa(cpuc, true);
2089 }
2090 
2091 static void intel_tfa_pmu_enable_all(int added)
2092 {
2093         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2094 
2095         /*
2096          * If we find PMC3 is no longer used when we enable the PMU, we can
2097          * clear TFA.
2098          */
2099         if (!test_bit(3, cpuc->active_mask))
2100                 intel_set_tfa(cpuc, false);
2101 
2102         intel_pmu_enable_all(added);
2103 }
2104 
2105 static void enable_counter_freeze(void)
2106 {
2107         update_debugctlmsr(get_debugctlmsr() |
2108                         DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
2109 }
2110 
2111 static void disable_counter_freeze(void)
2112 {
2113         update_debugctlmsr(get_debugctlmsr() &
2114                         ~DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
2115 }
2116 
2117 static inline u64 intel_pmu_get_status(void)
2118 {
2119         u64 status;
2120 
2121         rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
2122 
2123         return status;
2124 }
2125 
2126 static inline void intel_pmu_ack_status(u64 ack)
2127 {
2128         wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
2129 }
2130 
2131 static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
2132 {
2133         int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
2134         u64 ctrl_val, mask;
2135 
2136         mask = 0xfULL << (idx * 4);
2137 
2138         rdmsrl(hwc->config_base, ctrl_val);
2139         ctrl_val &= ~mask;
2140         wrmsrl(hwc->config_base, ctrl_val);
2141 }
2142 
2143 static inline bool event_is_checkpointed(struct perf_event *event)
2144 {
2145         return (event->hw.config & HSW_IN_TX_CHECKPOINTED) != 0;
2146 }
2147 
2148 static void intel_pmu_disable_event(struct perf_event *event)
2149 {
2150         struct hw_perf_event *hwc = &event->hw;
2151         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2152 
2153         if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
2154                 intel_pmu_disable_bts();
2155                 intel_pmu_drain_bts_buffer();
2156                 return;
2157         }
2158 
2159         cpuc->intel_ctrl_guest_mask &= ~(1ull << hwc->idx);
2160         cpuc->intel_ctrl_host_mask &= ~(1ull << hwc->idx);
2161         cpuc->intel_cp_status &= ~(1ull << hwc->idx);
2162 
2163         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL))
2164                 intel_pmu_disable_fixed(hwc);
2165         else
2166                 x86_pmu_disable_event(event);
2167 
2168         /*
2169          * Needs to be called after x86_pmu_disable_event,
2170          * so we don't trigger the event without PEBS bit set.
2171          */
2172         if (unlikely(event->attr.precise_ip))
2173                 intel_pmu_pebs_disable(event);
2174 }
2175 
2176 static void intel_pmu_del_event(struct perf_event *event)
2177 {
2178         if (needs_branch_stack(event))
2179                 intel_pmu_lbr_del(event);
2180         if (event->attr.precise_ip)
2181                 intel_pmu_pebs_del(event);
2182 }
2183 
2184 static void intel_pmu_read_event(struct perf_event *event)
2185 {
2186         if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
2187                 intel_pmu_auto_reload_read(event);
2188         else
2189                 x86_perf_event_update(event);
2190 }
2191 
2192 static void intel_pmu_enable_fixed(struct perf_event *event)
2193 {
2194         struct hw_perf_event *hwc = &event->hw;
2195         int idx = hwc->idx - INTEL_PMC_IDX_FIXED;
2196         u64 ctrl_val, mask, bits = 0;
2197 
2198         /*
2199          * Enable IRQ generation (0x8), if not PEBS,
2200          * and enable ring-3 counting (0x2) and ring-0 counting (0x1)
2201          * if requested:
2202          */
2203         if (!event->attr.precise_ip)
2204                 bits |= 0x8;
2205         if (hwc->config & ARCH_PERFMON_EVENTSEL_USR)
2206                 bits |= 0x2;
2207         if (hwc->config & ARCH_PERFMON_EVENTSEL_OS)
2208                 bits |= 0x1;
2209 
2210         /*
2211          * ANY bit is supported in v3 and up
2212          */
2213         if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY)
2214                 bits |= 0x4;
2215 
2216         bits <<= (idx * 4);
2217         mask = 0xfULL << (idx * 4);
2218 
2219         if (x86_pmu.intel_cap.pebs_baseline && event->attr.precise_ip) {
2220                 bits |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
2221                 mask |= ICL_FIXED_0_ADAPTIVE << (idx * 4);
2222         }
2223 
2224         rdmsrl(hwc->config_base, ctrl_val);
2225         ctrl_val &= ~mask;
2226         ctrl_val |= bits;
2227         wrmsrl(hwc->config_base, ctrl_val);
2228 }
2229 
2230 static void intel_pmu_enable_event(struct perf_event *event)
2231 {
2232         struct hw_perf_event *hwc = &event->hw;
2233         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2234 
2235         if (unlikely(hwc->idx == INTEL_PMC_IDX_FIXED_BTS)) {
2236                 if (!__this_cpu_read(cpu_hw_events.enabled))
2237                         return;
2238 
2239                 intel_pmu_enable_bts(hwc->config);
2240                 return;
2241         }
2242 
2243         if (event->attr.exclude_host)
2244                 cpuc->intel_ctrl_guest_mask |= (1ull << hwc->idx);
2245         if (event->attr.exclude_guest)
2246                 cpuc->intel_ctrl_host_mask |= (1ull << hwc->idx);
2247 
2248         if (unlikely(event_is_checkpointed(event)))
2249                 cpuc->intel_cp_status |= (1ull << hwc->idx);
2250 
2251         if (unlikely(event->attr.precise_ip))
2252                 intel_pmu_pebs_enable(event);
2253 
2254         if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) {
2255                 intel_pmu_enable_fixed(event);
2256                 return;
2257         }
2258 
2259         __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
2260 }
2261 
2262 static void intel_pmu_add_event(struct perf_event *event)
2263 {
2264         if (event->attr.precise_ip)
2265                 intel_pmu_pebs_add(event);
2266         if (needs_branch_stack(event))
2267                 intel_pmu_lbr_add(event);
2268 }
2269 
2270 /*
2271  * Save and restart an expired event. Called by NMI contexts,
2272  * so it has to be careful about preempting normal event ops:
2273  */
2274 int intel_pmu_save_and_restart(struct perf_event *event)
2275 {
2276         x86_perf_event_update(event);
2277         /*
2278          * For a checkpointed counter always reset back to 0.  This
2279          * avoids a situation where the counter overflows, aborts the
2280          * transaction and is then set back to shortly before the
2281          * overflow, and overflows and aborts again.
2282          */
2283         if (unlikely(event_is_checkpointed(event))) {
2284                 /* No race with NMIs because the counter should not be armed */
2285                 wrmsrl(event->hw.event_base, 0);
2286                 local64_set(&event->hw.prev_count, 0);
2287         }
2288         return x86_perf_event_set_period(event);
2289 }
2290 
2291 static void intel_pmu_reset(void)
2292 {
2293         struct debug_store *ds = __this_cpu_read(cpu_hw_events.ds);
2294         unsigned long flags;
2295         int idx;
2296 
2297         if (!x86_pmu.num_counters)
2298                 return;
2299 
2300         local_irq_save(flags);
2301 
2302         pr_info("clearing PMU state on CPU#%d\n", smp_processor_id());
2303 
2304         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
2305                 wrmsrl_safe(x86_pmu_config_addr(idx), 0ull);
2306                 wrmsrl_safe(x86_pmu_event_addr(idx),  0ull);
2307         }
2308         for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++)
2309                 wrmsrl_safe(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull);
2310 
2311         if (ds)
2312                 ds->bts_index = ds->bts_buffer_base;
2313 
2314         /* Ack all overflows and disable fixed counters */
2315         if (x86_pmu.version >= 2) {
2316                 intel_pmu_ack_status(intel_pmu_get_status());
2317                 wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
2318         }
2319 
2320         /* Reset LBRs and LBR freezing */
2321         if (x86_pmu.lbr_nr) {
2322                 update_debugctlmsr(get_debugctlmsr() &
2323                         ~(DEBUGCTLMSR_FREEZE_LBRS_ON_PMI|DEBUGCTLMSR_LBR));
2324         }
2325 
2326         local_irq_restore(flags);
2327 }
2328 
2329 static int handle_pmi_common(struct pt_regs *regs, u64 status)
2330 {
2331         struct perf_sample_data data;
2332         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2333         int bit;
2334         int handled = 0;
2335 
2336         inc_irq_stat(apic_perf_irqs);
2337 
2338         /*
2339          * Ignore a range of extra bits in status that do not indicate
2340          * overflow by themselves.
2341          */
2342         status &= ~(GLOBAL_STATUS_COND_CHG |
2343                     GLOBAL_STATUS_ASIF |
2344                     GLOBAL_STATUS_LBRS_FROZEN);
2345         if (!status)
2346                 return 0;
2347         /*
2348          * In case multiple PEBS events are sampled at the same time,
2349          * it is possible to have GLOBAL_STATUS bit 62 set indicating
2350          * PEBS buffer overflow and also seeing at most 3 PEBS counters
2351          * having their bits set in the status register. This is a sign
2352          * that there was at least one PEBS record pending at the time
2353          * of the PMU interrupt. PEBS counters must only be processed
2354          * via the drain_pebs() calls and not via the regular sample
2355          * processing loop coming after that the function, otherwise
2356          * phony regular samples may be generated in the sampling buffer
2357          * not marked with the EXACT tag. Another possibility is to have
2358          * one PEBS event and at least one non-PEBS event whic hoverflows
2359          * while PEBS has armed. In this case, bit 62 of GLOBAL_STATUS will
2360          * not be set, yet the overflow status bit for the PEBS counter will
2361          * be on Skylake.
2362          *
2363          * To avoid this problem, we systematically ignore the PEBS-enabled
2364          * counters from the GLOBAL_STATUS mask and we always process PEBS
2365          * events via drain_pebs().
2366          */
2367         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
2368                 status &= ~cpuc->pebs_enabled;
2369         else
2370                 status &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
2371 
2372         /*
2373          * PEBS overflow sets bit 62 in the global status register
2374          */
2375         if (__test_and_clear_bit(62, (unsigned long *)&status)) {
2376                 handled++;
2377                 x86_pmu.drain_pebs(regs);
2378                 status &= x86_pmu.intel_ctrl | GLOBAL_STATUS_TRACE_TOPAPMI;
2379         }
2380 
2381         /*
2382          * Intel PT
2383          */
2384         if (__test_and_clear_bit(55, (unsigned long *)&status)) {
2385                 handled++;
2386                 if (unlikely(perf_guest_cbs && perf_guest_cbs->is_in_guest() &&
2387                         perf_guest_cbs->handle_intel_pt_intr))
2388                         perf_guest_cbs->handle_intel_pt_intr();
2389                 else
2390                         intel_pt_interrupt();
2391         }
2392 
2393         /*
2394          * Checkpointed counters can lead to 'spurious' PMIs because the
2395          * rollback caused by the PMI will have cleared the overflow status
2396          * bit. Therefore always force probe these counters.
2397          */
2398         status |= cpuc->intel_cp_status;
2399 
2400         for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
2401                 struct perf_event *event = cpuc->events[bit];
2402 
2403                 handled++;
2404 
2405                 if (!test_bit(bit, cpuc->active_mask))
2406                         continue;
2407 
2408                 if (!intel_pmu_save_and_restart(event))
2409                         continue;
2410 
2411                 perf_sample_data_init(&data, 0, event->hw.last_period);
2412 
2413                 if (has_branch_stack(event))
2414                         data.br_stack = &cpuc->lbr_stack;
2415 
2416                 if (perf_event_overflow(event, &data, regs))
2417                         x86_pmu_stop(event, 0);
2418         }
2419 
2420         return handled;
2421 }
2422 
2423 static bool disable_counter_freezing = true;
2424 static int __init intel_perf_counter_freezing_setup(char *s)
2425 {
2426         bool res;
2427 
2428         if (kstrtobool(s, &res))
2429                 return -EINVAL;
2430 
2431         disable_counter_freezing = !res;
2432         return 1;
2433 }
2434 __setup("perf_v4_pmi=", intel_perf_counter_freezing_setup);
2435 
2436 /*
2437  * Simplified handler for Arch Perfmon v4:
2438  * - We rely on counter freezing/unfreezing to enable/disable the PMU.
2439  * This is done automatically on PMU ack.
2440  * - Ack the PMU only after the APIC.
2441  */
2442 
2443 static int intel_pmu_handle_irq_v4(struct pt_regs *regs)
2444 {
2445         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
2446         int handled = 0;
2447         bool bts = false;
2448         u64 status;
2449         int pmu_enabled = cpuc->enabled;
2450         int loops = 0;
2451 
2452         /* PMU has been disabled because of counter freezing */
2453         cpuc->enabled = 0;
2454         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
2455                 bts = true;
2456                 intel_bts_disable_local();
2457                 handled = intel_pmu_drain_bts_buffer();
2458                 handled += intel_bts_interrupt();
2459         }
2460         status = intel_pmu_get_status();
2461         if (!status)
2462                 goto done;
2463 again:
2464         intel_pmu_lbr_read();
2465         if (++loops > 100) {
2466                 static bool warned;
2467 
2468                 if (!warned) {
2469                         WARN(1, "perfevents: irq loop stuck!\n");
2470                         perf_event_print_debug();
2471                         warned = true;
2472                 }
2473                 intel_pmu_reset();
2474                 goto done;
2475         }
2476 
2477 
2478         handled += handle_pmi_common(regs, status);
2479 done:
2480         /* Ack the PMI in the APIC */
2481         apic_write(APIC_LVTPC, APIC_DM_NMI);
2482 
2483         /*
2484          * The counters start counting immediately while ack the status.
2485          * Make it as close as possible to IRET. This avoids bogus
2486          * freezing on Skylake CPUs.
2487          */
2488         if (status) {
2489                 intel_pmu_ack_status(status);
2490         } else {
2491                 /*
2492                  * CPU may issues two PMIs very close to each other.
2493                  * When the PMI handler services the first one, the
2494                  * GLOBAL_STATUS is already updated to reflect both.
2495                  * When it IRETs, the second PMI is immediately
2496                  * handled and it sees clear status. At the meantime,
2497                  * there may be a third PMI, because the freezing bit
2498                  * isn't set since the ack in first PMI handlers.
2499                  * Double check if there is more work to be done.
2500                  */
2501                 status = intel_pmu_get_status();
2502                 if (status)
2503                         goto again;
2504         }
2505 
2506         if (bts)
2507                 intel_bts_enable_local();
2508         cpuc->enabled = pmu_enabled;
2509         return handled;
2510 }
2511 
2512 /*
2513  * This handler is triggered by the local APIC, so the APIC IRQ handling
2514  * rules apply:
2515  */
2516 static int intel_pmu_handle_irq(struct pt_regs *regs)
2517 {
2518         struct cpu_hw_events *cpuc;
2519         int loops;
2520         u64 status;
2521         int handled;
2522         int pmu_enabled;
2523 
2524         cpuc = this_cpu_ptr(&cpu_hw_events);
2525 
2526         /*
2527          * Save the PMU state.
2528          * It needs to be restored when leaving the handler.
2529          */
2530         pmu_enabled = cpuc->enabled;
2531         /*
2532          * No known reason to not always do late ACK,
2533          * but just in case do it opt-in.
2534          */
2535         if (!x86_pmu.late_ack)
2536                 apic_write(APIC_LVTPC, APIC_DM_NMI);
2537         intel_bts_disable_local();
2538         cpuc->enabled = 0;
2539         __intel_pmu_disable_all();
2540         handled = intel_pmu_drain_bts_buffer();
2541         handled += intel_bts_interrupt();
2542         status = intel_pmu_get_status();
2543         if (!status)
2544                 goto done;
2545 
2546         loops = 0;
2547 again:
2548         intel_pmu_lbr_read();
2549         intel_pmu_ack_status(status);
2550         if (++loops > 100) {
2551                 static bool warned;
2552 
2553                 if (!warned) {
2554                         WARN(1, "perfevents: irq loop stuck!\n");
2555                         perf_event_print_debug();
2556                         warned = true;
2557                 }
2558                 intel_pmu_reset();
2559                 goto done;
2560         }
2561 
2562         handled += handle_pmi_common(regs, status);
2563 
2564         /*
2565          * Repeat if there is more work to be done:
2566          */
2567         status = intel_pmu_get_status();
2568         if (status)
2569                 goto again;
2570 
2571 done:
2572         /* Only restore PMU state when it's active. See x86_pmu_disable(). */
2573         cpuc->enabled = pmu_enabled;
2574         if (pmu_enabled)
2575                 __intel_pmu_enable_all(0, true);
2576         intel_bts_enable_local();
2577 
2578         /*
2579          * Only unmask the NMI after the overflow counters
2580          * have been reset. This avoids spurious NMIs on
2581          * Haswell CPUs.
2582          */
2583         if (x86_pmu.late_ack)
2584                 apic_write(APIC_LVTPC, APIC_DM_NMI);
2585         return handled;
2586 }
2587 
2588 static struct event_constraint *
2589 intel_bts_constraints(struct perf_event *event)
2590 {
2591         if (unlikely(intel_pmu_has_bts(event)))
2592                 return &bts_constraint;
2593 
2594         return NULL;
2595 }
2596 
2597 static int intel_alt_er(int idx, u64 config)
2598 {
2599         int alt_idx = idx;
2600 
2601         if (!(x86_pmu.flags & PMU_FL_HAS_RSP_1))
2602                 return idx;
2603 
2604         if (idx == EXTRA_REG_RSP_0)
2605                 alt_idx = EXTRA_REG_RSP_1;
2606 
2607         if (idx == EXTRA_REG_RSP_1)
2608                 alt_idx = EXTRA_REG_RSP_0;
2609 
2610         if (config & ~x86_pmu.extra_regs[alt_idx].valid_mask)
2611                 return idx;
2612 
2613         return alt_idx;
2614 }
2615 
2616 static void intel_fixup_er(struct perf_event *event, int idx)
2617 {
2618         event->hw.extra_reg.idx = idx;
2619 
2620         if (idx == EXTRA_REG_RSP_0) {
2621                 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
2622                 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_0].event;
2623                 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_0;
2624         } else if (idx == EXTRA_REG_RSP_1) {
2625                 event->hw.config &= ~INTEL_ARCH_EVENT_MASK;
2626                 event->hw.config |= x86_pmu.extra_regs[EXTRA_REG_RSP_1].event;
2627                 event->hw.extra_reg.reg = MSR_OFFCORE_RSP_1;
2628         }
2629 }
2630 
2631 /*
2632  * manage allocation of shared extra msr for certain events
2633  *
2634  * sharing can be:
2635  * per-cpu: to be shared between the various events on a single PMU
2636  * per-core: per-cpu + shared by HT threads
2637  */
2638 static struct event_constraint *
2639 __intel_shared_reg_get_constraints(struct cpu_hw_events *cpuc,
2640                                    struct perf_event *event,
2641                                    struct hw_perf_event_extra *reg)
2642 {
2643         struct event_constraint *c = &emptyconstraint;
2644         struct er_account *era;
2645         unsigned long flags;
2646         int idx = reg->idx;
2647 
2648         /*
2649          * reg->alloc can be set due to existing state, so for fake cpuc we
2650          * need to ignore this, otherwise we might fail to allocate proper fake
2651          * state for this extra reg constraint. Also see the comment below.
2652          */
2653         if (reg->alloc && !cpuc->is_fake)
2654                 return NULL; /* call x86_get_event_constraint() */
2655 
2656 again:
2657         era = &cpuc->shared_regs->regs[idx];
2658         /*
2659          * we use spin_lock_irqsave() to avoid lockdep issues when
2660          * passing a fake cpuc
2661          */
2662         raw_spin_lock_irqsave(&era->lock, flags);
2663 
2664         if (!atomic_read(&era->ref) || era->config == reg->config) {
2665 
2666                 /*
2667                  * If its a fake cpuc -- as per validate_{group,event}() we
2668                  * shouldn't touch event state and we can avoid doing so
2669                  * since both will only call get_event_constraints() once
2670                  * on each event, this avoids the need for reg->alloc.
2671                  *
2672                  * Not doing the ER fixup will only result in era->reg being
2673                  * wrong, but since we won't actually try and program hardware
2674                  * this isn't a problem either.
2675                  */
2676                 if (!cpuc->is_fake) {
2677                         if (idx != reg->idx)
2678                                 intel_fixup_er(event, idx);
2679 
2680                         /*
2681                          * x86_schedule_events() can call get_event_constraints()
2682                          * multiple times on events in the case of incremental
2683                          * scheduling(). reg->alloc ensures we only do the ER
2684                          * allocation once.
2685                          */
2686                         reg->alloc = 1;
2687                 }
2688 
2689                 /* lock in msr value */
2690                 era->config = reg->config;
2691                 era->reg = reg->reg;
2692 
2693                 /* one more user */
2694                 atomic_inc(&era->ref);
2695 
2696                 /*
2697                  * need to call x86_get_event_constraint()
2698                  * to check if associated event has constraints
2699                  */
2700                 c = NULL;
2701         } else {
2702                 idx = intel_alt_er(idx, reg->config);
2703                 if (idx != reg->idx) {
2704                         raw_spin_unlock_irqrestore(&era->lock, flags);
2705                         goto again;
2706                 }
2707         }
2708         raw_spin_unlock_irqrestore(&era->lock, flags);
2709 
2710         return c;
2711 }
2712 
2713 static void
2714 __intel_shared_reg_put_constraints(struct cpu_hw_events *cpuc,
2715                                    struct hw_perf_event_extra *reg)
2716 {
2717         struct er_account *era;
2718 
2719         /*
2720          * Only put constraint if extra reg was actually allocated. Also takes
2721          * care of event which do not use an extra shared reg.
2722          *
2723          * Also, if this is a fake cpuc we shouldn't touch any event state
2724          * (reg->alloc) and we don't care about leaving inconsistent cpuc state
2725          * either since it'll be thrown out.
2726          */
2727         if (!reg->alloc || cpuc->is_fake)
2728                 return;
2729 
2730         era = &cpuc->shared_regs->regs[reg->idx];
2731 
2732         /* one fewer user */
2733         atomic_dec(&era->ref);
2734 
2735         /* allocate again next time */
2736         reg->alloc = 0;
2737 }
2738 
2739 static struct event_constraint *
2740 intel_shared_regs_constraints(struct cpu_hw_events *cpuc,
2741                               struct perf_event *event)
2742 {
2743         struct event_constraint *c = NULL, *d;
2744         struct hw_perf_event_extra *xreg, *breg;
2745 
2746         xreg = &event->hw.extra_reg;
2747         if (xreg->idx != EXTRA_REG_NONE) {
2748                 c = __intel_shared_reg_get_constraints(cpuc, event, xreg);
2749                 if (c == &emptyconstraint)
2750                         return c;
2751         }
2752         breg = &event->hw.branch_reg;
2753         if (breg->idx != EXTRA_REG_NONE) {
2754                 d = __intel_shared_reg_get_constraints(cpuc, event, breg);
2755                 if (d == &emptyconstraint) {
2756                         __intel_shared_reg_put_constraints(cpuc, xreg);
2757                         c = d;
2758                 }
2759         }
2760         return c;
2761 }
2762 
2763 struct event_constraint *
2764 x86_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
2765                           struct perf_event *event)
2766 {
2767         struct event_constraint *c;
2768 
2769         if (x86_pmu.event_constraints) {
2770                 for_each_event_constraint(c, x86_pmu.event_constraints) {
2771                         if (constraint_match(c, event->hw.config)) {
2772                                 event->hw.flags |= c->flags;
2773                                 return c;
2774                         }
2775                 }
2776         }
2777 
2778         return &unconstrained;
2779 }
2780 
2781 static struct event_constraint *
2782 __intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
2783                             struct perf_event *event)
2784 {
2785         struct event_constraint *c;
2786 
2787         c = intel_bts_constraints(event);
2788         if (c)
2789                 return c;
2790 
2791         c = intel_shared_regs_constraints(cpuc, event);
2792         if (c)
2793                 return c;
2794 
2795         c = intel_pebs_constraints(event);
2796         if (c)
2797                 return c;
2798 
2799         return x86_get_event_constraints(cpuc, idx, event);
2800 }
2801 
2802 static void
2803 intel_start_scheduling(struct cpu_hw_events *cpuc)
2804 {
2805         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2806         struct intel_excl_states *xl;
2807         int tid = cpuc->excl_thread_id;
2808 
2809         /*
2810          * nothing needed if in group validation mode
2811          */
2812         if (cpuc->is_fake || !is_ht_workaround_enabled())
2813                 return;
2814 
2815         /*
2816          * no exclusion needed
2817          */
2818         if (WARN_ON_ONCE(!excl_cntrs))
2819                 return;
2820 
2821         xl = &excl_cntrs->states[tid];
2822 
2823         xl->sched_started = true;
2824         /*
2825          * lock shared state until we are done scheduling
2826          * in stop_event_scheduling()
2827          * makes scheduling appear as a transaction
2828          */
2829         raw_spin_lock(&excl_cntrs->lock);
2830 }
2831 
2832 static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
2833 {
2834         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2835         struct event_constraint *c = cpuc->event_constraint[idx];
2836         struct intel_excl_states *xl;
2837         int tid = cpuc->excl_thread_id;
2838 
2839         if (cpuc->is_fake || !is_ht_workaround_enabled())
2840                 return;
2841 
2842         if (WARN_ON_ONCE(!excl_cntrs))
2843                 return;
2844 
2845         if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
2846                 return;
2847 
2848         xl = &excl_cntrs->states[tid];
2849 
2850         lockdep_assert_held(&excl_cntrs->lock);
2851 
2852         if (c->flags & PERF_X86_EVENT_EXCL)
2853                 xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
2854         else
2855                 xl->state[cntr] = INTEL_EXCL_SHARED;
2856 }
2857 
2858 static void
2859 intel_stop_scheduling(struct cpu_hw_events *cpuc)
2860 {
2861         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2862         struct intel_excl_states *xl;
2863         int tid = cpuc->excl_thread_id;
2864 
2865         /*
2866          * nothing needed if in group validation mode
2867          */
2868         if (cpuc->is_fake || !is_ht_workaround_enabled())
2869                 return;
2870         /*
2871          * no exclusion needed
2872          */
2873         if (WARN_ON_ONCE(!excl_cntrs))
2874                 return;
2875 
2876         xl = &excl_cntrs->states[tid];
2877 
2878         xl->sched_started = false;
2879         /*
2880          * release shared state lock (acquired in intel_start_scheduling())
2881          */
2882         raw_spin_unlock(&excl_cntrs->lock);
2883 }
2884 
2885 static struct event_constraint *
2886 dyn_constraint(struct cpu_hw_events *cpuc, struct event_constraint *c, int idx)
2887 {
2888         WARN_ON_ONCE(!cpuc->constraint_list);
2889 
2890         if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
2891                 struct event_constraint *cx;
2892 
2893                 /*
2894                  * grab pre-allocated constraint entry
2895                  */
2896                 cx = &cpuc->constraint_list[idx];
2897 
2898                 /*
2899                  * initialize dynamic constraint
2900                  * with static constraint
2901                  */
2902                 *cx = *c;
2903 
2904                 /*
2905                  * mark constraint as dynamic
2906                  */
2907                 cx->flags |= PERF_X86_EVENT_DYNAMIC;
2908                 c = cx;
2909         }
2910 
2911         return c;
2912 }
2913 
2914 static struct event_constraint *
2915 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
2916                            int idx, struct event_constraint *c)
2917 {
2918         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
2919         struct intel_excl_states *xlo;
2920         int tid = cpuc->excl_thread_id;
2921         int is_excl, i, w;
2922 
2923         /*
2924          * validating a group does not require
2925          * enforcing cross-thread  exclusion
2926          */
2927         if (cpuc->is_fake || !is_ht_workaround_enabled())
2928                 return c;
2929 
2930         /*
2931          * no exclusion needed
2932          */
2933         if (WARN_ON_ONCE(!excl_cntrs))
2934                 return c;
2935 
2936         /*
2937          * because we modify the constraint, we need
2938          * to make a copy. Static constraints come
2939          * from static const tables.
2940          *
2941          * only needed when constraint has not yet
2942          * been cloned (marked dynamic)
2943          */
2944         c = dyn_constraint(cpuc, c, idx);
2945 
2946         /*
2947          * From here on, the constraint is dynamic.
2948          * Either it was just allocated above, or it
2949          * was allocated during a earlier invocation
2950          * of this function
2951          */
2952 
2953         /*
2954          * state of sibling HT
2955          */
2956         xlo = &excl_cntrs->states[tid ^ 1];
2957 
2958         /*
2959          * event requires exclusive counter access
2960          * across HT threads
2961          */
2962         is_excl = c->flags & PERF_X86_EVENT_EXCL;
2963         if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
2964                 event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
2965                 if (!cpuc->n_excl++)
2966                         WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
2967         }
2968 
2969         /*
2970          * Modify static constraint with current dynamic
2971          * state of thread
2972          *
2973          * EXCLUSIVE: sibling counter measuring exclusive event
2974          * SHARED   : sibling counter measuring non-exclusive event
2975          * UNUSED   : sibling counter unused
2976          */
2977         w = c->weight;
2978         for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
2979                 /*
2980                  * exclusive event in sibling counter
2981                  * our corresponding counter cannot be used
2982                  * regardless of our event
2983                  */
2984                 if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE) {
2985                         __clear_bit(i, c->idxmsk);
2986                         w--;
2987                         continue;
2988                 }
2989                 /*
2990                  * if measuring an exclusive event, sibling
2991                  * measuring non-exclusive, then counter cannot
2992                  * be used
2993                  */
2994                 if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED) {
2995                         __clear_bit(i, c->idxmsk);
2996                         w--;
2997                         continue;
2998                 }
2999         }
3000 
3001         /*
3002          * if we return an empty mask, then switch
3003          * back to static empty constraint to avoid
3004          * the cost of freeing later on
3005          */
3006         if (!w)
3007                 c = &emptyconstraint;
3008 
3009         c->weight = w;
3010 
3011         return c;
3012 }
3013 
3014 static struct event_constraint *
3015 intel_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3016                             struct perf_event *event)
3017 {
3018         struct event_constraint *c1, *c2;
3019 
3020         c1 = cpuc->event_constraint[idx];
3021 
3022         /*
3023          * first time only
3024          * - static constraint: no change across incremental scheduling calls
3025          * - dynamic constraint: handled by intel_get_excl_constraints()
3026          */
3027         c2 = __intel_get_event_constraints(cpuc, idx, event);
3028         if (c1) {
3029                 WARN_ON_ONCE(!(c1->flags & PERF_X86_EVENT_DYNAMIC));
3030                 bitmap_copy(c1->idxmsk, c2->idxmsk, X86_PMC_IDX_MAX);
3031                 c1->weight = c2->weight;
3032                 c2 = c1;
3033         }
3034 
3035         if (cpuc->excl_cntrs)
3036                 return intel_get_excl_constraints(cpuc, event, idx, c2);
3037 
3038         return c2;
3039 }
3040 
3041 static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
3042                 struct perf_event *event)
3043 {
3044         struct hw_perf_event *hwc = &event->hw;
3045         struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
3046         int tid = cpuc->excl_thread_id;
3047         struct intel_excl_states *xl;
3048 
3049         /*
3050          * nothing needed if in group validation mode
3051          */
3052         if (cpuc->is_fake)
3053                 return;
3054 
3055         if (WARN_ON_ONCE(!excl_cntrs))
3056                 return;
3057 
3058         if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
3059                 hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
3060                 if (!--cpuc->n_excl)
3061                         WRITE_ONCE(excl_cntrs->has_exclusive[tid], 0);
3062         }
3063 
3064         /*
3065          * If event was actually assigned, then mark the counter state as
3066          * unused now.
3067          */
3068         if (hwc->idx >= 0) {
3069                 xl = &excl_cntrs->states[tid];
3070 
3071                 /*
3072                  * put_constraint may be called from x86_schedule_events()
3073                  * which already has the lock held so here make locking
3074                  * conditional.
3075                  */
3076                 if (!xl->sched_started)
3077                         raw_spin_lock(&excl_cntrs->lock);
3078 
3079                 xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
3080 
3081                 if (!xl->sched_started)
3082                         raw_spin_unlock(&excl_cntrs->lock);
3083         }
3084 }
3085 
3086 static void
3087 intel_put_shared_regs_event_constraints(struct cpu_hw_events *cpuc,
3088                                         struct perf_event *event)
3089 {
3090         struct hw_perf_event_extra *reg;
3091 
3092         reg = &event->hw.extra_reg;
3093         if (reg->idx != EXTRA_REG_NONE)
3094                 __intel_shared_reg_put_constraints(cpuc, reg);
3095 
3096         reg = &event->hw.branch_reg;
3097         if (reg->idx != EXTRA_REG_NONE)
3098                 __intel_shared_reg_put_constraints(cpuc, reg);
3099 }
3100 
3101 static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
3102                                         struct perf_event *event)
3103 {
3104         intel_put_shared_regs_event_constraints(cpuc, event);
3105 
3106         /*
3107          * is PMU has exclusive counter restrictions, then
3108          * all events are subject to and must call the
3109          * put_excl_constraints() routine
3110          */
3111         if (cpuc->excl_cntrs)
3112                 intel_put_excl_constraints(cpuc, event);
3113 }
3114 
3115 static void intel_pebs_aliases_core2(struct perf_event *event)
3116 {
3117         if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
3118                 /*
3119                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
3120                  * (0x003c) so that we can use it with PEBS.
3121                  *
3122                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
3123                  * PEBS capable. However we can use INST_RETIRED.ANY_P
3124                  * (0x00c0), which is a PEBS capable event, to get the same
3125                  * count.
3126                  *
3127                  * INST_RETIRED.ANY_P counts the number of cycles that retires
3128                  * CNTMASK instructions. By setting CNTMASK to a value (16)
3129                  * larger than the maximum number of instructions that can be
3130                  * retired per cycle (4) and then inverting the condition, we
3131                  * count all cycles that retire 16 or less instructions, which
3132                  * is every cycle.
3133                  *
3134                  * Thereby we gain a PEBS capable cycle counter.
3135                  */
3136                 u64 alt_config = X86_CONFIG(.event=0xc0, .inv=1, .cmask=16);
3137 
3138                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
3139                 event->hw.config = alt_config;
3140         }
3141 }
3142 
3143 static void intel_pebs_aliases_snb(struct perf_event *event)
3144 {
3145         if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
3146                 /*
3147                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
3148                  * (0x003c) so that we can use it with PEBS.
3149                  *
3150                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
3151                  * PEBS capable. However we can use UOPS_RETIRED.ALL
3152                  * (0x01c2), which is a PEBS capable event, to get the same
3153                  * count.
3154                  *
3155                  * UOPS_RETIRED.ALL counts the number of cycles that retires
3156                  * CNTMASK micro-ops. By setting CNTMASK to a value (16)
3157                  * larger than the maximum number of micro-ops that can be
3158                  * retired per cycle (4) and then inverting the condition, we
3159                  * count all cycles that retire 16 or less micro-ops, which
3160                  * is every cycle.
3161                  *
3162                  * Thereby we gain a PEBS capable cycle counter.
3163                  */
3164                 u64 alt_config = X86_CONFIG(.event=0xc2, .umask=0x01, .inv=1, .cmask=16);
3165 
3166                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
3167                 event->hw.config = alt_config;
3168         }
3169 }
3170 
3171 static void intel_pebs_aliases_precdist(struct perf_event *event)
3172 {
3173         if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
3174                 /*
3175                  * Use an alternative encoding for CPU_CLK_UNHALTED.THREAD_P
3176                  * (0x003c) so that we can use it with PEBS.
3177                  *
3178                  * The regular CPU_CLK_UNHALTED.THREAD_P event (0x003c) isn't
3179                  * PEBS capable. However we can use INST_RETIRED.PREC_DIST
3180                  * (0x01c0), which is a PEBS capable event, to get the same
3181                  * count.
3182                  *
3183                  * The PREC_DIST event has special support to minimize sample
3184                  * shadowing effects. One drawback is that it can be
3185                  * only programmed on counter 1, but that seems like an
3186                  * acceptable trade off.
3187                  */
3188                 u64 alt_config = X86_CONFIG(.event=0xc0, .umask=0x01, .inv=1, .cmask=16);
3189 
3190                 alt_config |= (event->hw.config & ~X86_RAW_EVENT_MASK);
3191                 event->hw.config = alt_config;
3192         }
3193 }
3194 
3195 static void intel_pebs_aliases_ivb(struct perf_event *event)
3196 {
3197         if (event->attr.precise_ip < 3)
3198                 return intel_pebs_aliases_snb(event);
3199         return intel_pebs_aliases_precdist(event);
3200 }
3201 
3202 static void intel_pebs_aliases_skl(struct perf_event *event)
3203 {
3204         if (event->attr.precise_ip < 3)
3205                 return intel_pebs_aliases_core2(event);
3206         return intel_pebs_aliases_precdist(event);
3207 }
3208 
3209 static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event)
3210 {
3211         unsigned long flags = x86_pmu.large_pebs_flags;
3212 
3213         if (event->attr.use_clockid)
3214                 flags &= ~PERF_SAMPLE_TIME;
3215         if (!event->attr.exclude_kernel)
3216                 flags &= ~PERF_SAMPLE_REGS_USER;
3217         if (event->attr.sample_regs_user & ~PEBS_GP_REGS)
3218                 flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
3219         return flags;
3220 }
3221 
3222 static int intel_pmu_bts_config(struct perf_event *event)
3223 {
3224         struct perf_event_attr *attr = &event->attr;
3225 
3226         if (unlikely(intel_pmu_has_bts(event))) {
3227                 /* BTS is not supported by this architecture. */
3228                 if (!x86_pmu.bts_active)
3229                         return -EOPNOTSUPP;
3230 
3231                 /* BTS is currently only allowed for user-mode. */
3232                 if (!attr->exclude_kernel)
3233                         return -EOPNOTSUPP;
3234 
3235                 /* BTS is not allowed for precise events. */
3236                 if (attr->precise_ip)
3237                         return -EOPNOTSUPP;
3238 
3239                 /* disallow bts if conflicting events are present */
3240                 if (x86_add_exclusive(x86_lbr_exclusive_lbr))
3241                         return -EBUSY;
3242 
3243                 event->destroy = hw_perf_lbr_event_destroy;
3244         }
3245 
3246         return 0;
3247 }
3248 
3249 static int core_pmu_hw_config(struct perf_event *event)
3250 {
3251         int ret = x86_pmu_hw_config(event);
3252 
3253         if (ret)
3254                 return ret;
3255 
3256         return intel_pmu_bts_config(event);
3257 }
3258 
3259 static int intel_pmu_hw_config(struct perf_event *event)
3260 {
3261         int ret = x86_pmu_hw_config(event);
3262 
3263         if (ret)
3264                 return ret;
3265 
3266         ret = intel_pmu_bts_config(event);
3267         if (ret)
3268                 return ret;
3269 
3270         if (event->attr.precise_ip) {
3271                 if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
3272                         event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
3273                         if (!(event->attr.sample_type &
3274                               ~intel_pmu_large_pebs_flags(event)))
3275                                 event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
3276                 }
3277                 if (x86_pmu.pebs_aliases)
3278                         x86_pmu.pebs_aliases(event);
3279 
3280                 if (event->attr.sample_type & PERF_SAMPLE_CALLCHAIN)
3281                         event->attr.sample_type |= __PERF_SAMPLE_CALLCHAIN_EARLY;
3282         }
3283 
3284         if (needs_branch_stack(event)) {
3285                 ret = intel_pmu_setup_lbr_filter(event);
3286                 if (ret)
3287                         return ret;
3288 
3289                 /*
3290                  * BTS is set up earlier in this path, so don't account twice
3291                  */
3292                 if (!unlikely(intel_pmu_has_bts(event))) {
3293                         /* disallow lbr if conflicting events are present */
3294                         if (x86_add_exclusive(x86_lbr_exclusive_lbr))
3295                                 return -EBUSY;
3296 
3297                         event->destroy = hw_perf_lbr_event_destroy;
3298                 }
3299         }
3300 
3301         if (event->attr.type != PERF_TYPE_RAW)
3302                 return 0;
3303 
3304         if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY))
3305                 return 0;
3306 
3307         if (x86_pmu.version < 3)
3308                 return -EINVAL;
3309 
3310         if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
3311                 return -EACCES;
3312 
3313         event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
3314 
3315         return 0;
3316 }
3317 
3318 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr)
3319 {
3320         if (x86_pmu.guest_get_msrs)
3321                 return x86_pmu.guest_get_msrs(nr);
3322         *nr = 0;
3323         return NULL;
3324 }
3325 EXPORT_SYMBOL_GPL(perf_guest_get_msrs);
3326 
3327 static struct perf_guest_switch_msr *intel_guest_get_msrs(int *nr)
3328 {
3329         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3330         struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
3331 
3332         arr[0].msr = MSR_CORE_PERF_GLOBAL_CTRL;
3333         arr[0].host = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_guest_mask;
3334         arr[0].guest = x86_pmu.intel_ctrl & ~cpuc->intel_ctrl_host_mask;
3335         if (x86_pmu.flags & PMU_FL_PEBS_ALL)
3336                 arr[0].guest &= ~cpuc->pebs_enabled;
3337         else
3338                 arr[0].guest &= ~(cpuc->pebs_enabled & PEBS_COUNTER_MASK);
3339         *nr = 1;
3340 
3341         if (x86_pmu.pebs && x86_pmu.pebs_no_isolation) {
3342                 /*
3343                  * If PMU counter has PEBS enabled it is not enough to
3344                  * disable counter on a guest entry since PEBS memory
3345                  * write can overshoot guest entry and corrupt guest
3346                  * memory. Disabling PEBS solves the problem.
3347                  *
3348                  * Don't do this if the CPU already enforces it.
3349                  */
3350                 arr[1].msr = MSR_IA32_PEBS_ENABLE;
3351                 arr[1].host = cpuc->pebs_enabled;
3352                 arr[1].guest = 0;
3353                 *nr = 2;
3354         }
3355 
3356         return arr;
3357 }
3358 
3359 static struct perf_guest_switch_msr *core_guest_get_msrs(int *nr)
3360 {
3361         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3362         struct perf_guest_switch_msr *arr = cpuc->guest_switch_msrs;
3363         int idx;
3364 
3365         for (idx = 0; idx < x86_pmu.num_counters; idx++)  {
3366                 struct perf_event *event = cpuc->events[idx];
3367 
3368                 arr[idx].msr = x86_pmu_config_addr(idx);
3369                 arr[idx].host = arr[idx].guest = 0;
3370 
3371                 if (!test_bit(idx, cpuc->active_mask))
3372                         continue;
3373 
3374                 arr[idx].host = arr[idx].guest =
3375                         event->hw.config | ARCH_PERFMON_EVENTSEL_ENABLE;
3376 
3377                 if (event->attr.exclude_host)
3378                         arr[idx].host &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
3379                 else if (event->attr.exclude_guest)
3380                         arr[idx].guest &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
3381         }
3382 
3383         *nr = x86_pmu.num_counters;
3384         return arr;
3385 }
3386 
3387 static void core_pmu_enable_event(struct perf_event *event)
3388 {
3389         if (!event->attr.exclude_host)
3390                 x86_pmu_enable_event(event);
3391 }
3392 
3393 static void core_pmu_enable_all(int added)
3394 {
3395         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
3396         int idx;
3397 
3398         for (idx = 0; idx < x86_pmu.num_counters; idx++) {
3399                 struct hw_perf_event *hwc = &cpuc->events[idx]->hw;
3400 
3401                 if (!test_bit(idx, cpuc->active_mask) ||
3402                                 cpuc->events[idx]->attr.exclude_host)
3403                         continue;
3404 
3405                 __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
3406         }
3407 }
3408 
3409 static int hsw_hw_config(struct perf_event *event)
3410 {
3411         int ret = intel_pmu_hw_config(event);
3412 
3413         if (ret)
3414                 return ret;
3415         if (!boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has(X86_FEATURE_HLE))
3416                 return 0;
3417         event->hw.config |= event->attr.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED);
3418 
3419         /*
3420          * IN_TX/IN_TX-CP filters are not supported by the Haswell PMU with
3421          * PEBS or in ANY thread mode. Since the results are non-sensical forbid
3422          * this combination.
3423          */
3424         if ((event->hw.config & (HSW_IN_TX|HSW_IN_TX_CHECKPOINTED)) &&
3425              ((event->hw.config & ARCH_PERFMON_EVENTSEL_ANY) ||
3426               event->attr.precise_ip > 0))
3427                 return -EOPNOTSUPP;
3428 
3429         if (event_is_checkpointed(event)) {
3430                 /*
3431                  * Sampling of checkpointed events can cause situations where
3432                  * the CPU constantly aborts because of a overflow, which is
3433                  * then checkpointed back and ignored. Forbid checkpointing
3434                  * for sampling.
3435                  *
3436                  * But still allow a long sampling period, so that perf stat
3437                  * from KVM works.
3438                  */
3439                 if (event->attr.sample_period > 0 &&
3440                     event->attr.sample_period < 0x7fffffff)
3441                         return -EOPNOTSUPP;
3442         }
3443         return 0;
3444 }
3445 
3446 static struct event_constraint counter0_constraint =
3447                         INTEL_ALL_EVENT_CONSTRAINT(0, 0x1);
3448 
3449 static struct event_constraint counter2_constraint =
3450                         EVENT_CONSTRAINT(0, 0x4, 0);
3451 
3452 static struct event_constraint fixed0_constraint =
3453                         FIXED_EVENT_CONSTRAINT(0x00c0, 0);
3454 
3455 static struct event_constraint fixed0_counter0_constraint =
3456                         INTEL_ALL_EVENT_CONSTRAINT(0, 0x100000001ULL);
3457 
3458 static struct event_constraint *
3459 hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3460                           struct perf_event *event)
3461 {
3462         struct event_constraint *c;
3463 
3464         c = intel_get_event_constraints(cpuc, idx, event);
3465 
3466         /* Handle special quirk on in_tx_checkpointed only in counter 2 */
3467         if (event->hw.config & HSW_IN_TX_CHECKPOINTED) {
3468                 if (c->idxmsk64 & (1U << 2))
3469                         return &counter2_constraint;
3470                 return &emptyconstraint;
3471         }
3472 
3473         return c;
3474 }
3475 
3476 static struct event_constraint *
3477 icl_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3478                           struct perf_event *event)
3479 {
3480         /*
3481          * Fixed counter 0 has less skid.
3482          * Force instruction:ppp in Fixed counter 0
3483          */
3484         if ((event->attr.precise_ip == 3) &&
3485             constraint_match(&fixed0_constraint, event->hw.config))
3486                 return &fixed0_constraint;
3487 
3488         return hsw_get_event_constraints(cpuc, idx, event);
3489 }
3490 
3491 static struct event_constraint *
3492 glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3493                           struct perf_event *event)
3494 {
3495         struct event_constraint *c;
3496 
3497         /* :ppp means to do reduced skid PEBS which is PMC0 only. */
3498         if (event->attr.precise_ip == 3)
3499                 return &counter0_constraint;
3500 
3501         c = intel_get_event_constraints(cpuc, idx, event);
3502 
3503         return c;
3504 }
3505 
3506 static struct event_constraint *
3507 tnt_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3508                           struct perf_event *event)
3509 {
3510         struct event_constraint *c;
3511 
3512         /*
3513          * :ppp means to do reduced skid PEBS,
3514          * which is available on PMC0 and fixed counter 0.
3515          */
3516         if (event->attr.precise_ip == 3) {
3517                 /* Force instruction:ppp on PMC0 and Fixed counter 0 */
3518                 if (constraint_match(&fixed0_constraint, event->hw.config))
3519                         return &fixed0_counter0_constraint;
3520 
3521                 return &counter0_constraint;
3522         }
3523 
3524         c = intel_get_event_constraints(cpuc, idx, event);
3525 
3526         return c;
3527 }
3528 
3529 static bool allow_tsx_force_abort = true;
3530 
3531 static struct event_constraint *
3532 tfa_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
3533                           struct perf_event *event)
3534 {
3535         struct event_constraint *c = hsw_get_event_constraints(cpuc, idx, event);
3536 
3537         /*
3538          * Without TFA we must not use PMC3.
3539          */
3540         if (!allow_tsx_force_abort && test_bit(3, c->idxmsk)) {
3541                 c = dyn_constraint(cpuc, c, idx);
3542                 c->idxmsk64 &= ~(1ULL << 3);
3543                 c->weight--;
3544         }
3545 
3546         return c;
3547 }
3548 
3549 /*
3550  * Broadwell:
3551  *
3552  * The INST_RETIRED.ALL period always needs to have lowest 6 bits cleared
3553  * (BDM55) and it must not use a period smaller than 100 (BDM11). We combine
3554  * the two to enforce a minimum period of 128 (the smallest value that has bits
3555  * 0-5 cleared and >= 100).
3556  *
3557  * Because of how the code in x86_perf_event_set_period() works, the truncation
3558  * of the lower 6 bits is 'harmless' as we'll occasionally add a longer period
3559  * to make up for the 'lost' events due to carrying the 'error' in period_left.
3560  *
3561  * Therefore the effective (average) period matches the requested period,
3562  * despite coarser hardware granularity.
3563  */
3564 static u64 bdw_limit_period(struct perf_event *event, u64 left)
3565 {
3566         if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
3567                         X86_CONFIG(.event=0xc0, .umask=0x01)) {
3568                 if (left < 128)
3569                         left = 128;
3570                 left &= ~0x3fULL;
3571         }
3572         return left;
3573 }
3574 
3575 static u64 nhm_limit_period(struct perf_event *event, u64 left)
3576 {
3577         return max(left, 32ULL);
3578 }
3579 
3580 PMU_FORMAT_ATTR(event,  "config:0-7"    );
3581 PMU_FORMAT_ATTR(umask,  "config:8-15"   );
3582 PMU_FORMAT_ATTR(edge,   "config:18"     );
3583 PMU_FORMAT_ATTR(pc,     "config:19"     );
3584 PMU_FORMAT_ATTR(any,    "config:21"     ); /* v3 + */
3585 PMU_FORMAT_ATTR(inv,    "config:23"     );
3586 PMU_FORMAT_ATTR(cmask,  "config:24-31"  );
3587 PMU_FORMAT_ATTR(in_tx,  "config:32");
3588 PMU_FORMAT_ATTR(in_tx_cp, "config:33");
3589 
3590 static struct attribute *intel_arch_formats_attr[] = {
3591         &format_attr_event.attr,
3592         &format_attr_umask.attr,
3593         &format_attr_edge.attr,
3594         &format_attr_pc.attr,
3595         &format_attr_inv.attr,
3596         &format_attr_cmask.attr,
3597         NULL,
3598 };
3599 
3600 ssize_t intel_event_sysfs_show(char *page, u64 config)
3601 {
3602         u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT);
3603 
3604         return x86_event_sysfs_show(page, config, event);
3605 }
3606 
3607 static struct intel_shared_regs *allocate_shared_regs(int cpu)
3608 {
3609         struct intel_shared_regs *regs;
3610         int i;
3611 
3612         regs = kzalloc_node(sizeof(struct intel_shared_regs),
3613                             GFP_KERNEL, cpu_to_node(cpu));
3614         if (regs) {
3615                 /*
3616                  * initialize the locks to keep lockdep happy
3617                  */
3618                 for (i = 0; i < EXTRA_REG_MAX; i++)
3619                         raw_spin_lock_init(&regs->regs[i].lock);
3620 
3621                 regs->core_id = -1;
3622         }
3623         return regs;
3624 }
3625 
3626 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
3627 {
3628         struct intel_excl_cntrs *c;
3629 
3630         c = kzalloc_node(sizeof(struct intel_excl_cntrs),
3631                          GFP_KERNEL, cpu_to_node(cpu));
3632         if (c) {
3633                 raw_spin_lock_init(&c->lock);
3634                 c->core_id = -1;
3635         }
3636         return c;
3637 }
3638 
3639 
3640 int intel_cpuc_prepare(struct cpu_hw_events *cpuc, int cpu)
3641 {
3642         cpuc->pebs_record_size = x86_pmu.pebs_record_size;
3643 
3644         if (x86_pmu.extra_regs || x86_pmu.lbr_sel_map) {
3645                 cpuc->shared_regs = allocate_shared_regs(cpu);
3646                 if (!cpuc->shared_regs)
3647                         goto err;
3648         }
3649 
3650         if (x86_pmu.flags & (PMU_FL_EXCL_CNTRS | PMU_FL_TFA)) {
3651                 size_t sz = X86_PMC_IDX_MAX * sizeof(struct event_constraint);
3652 
3653                 cpuc->constraint_list = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu));
3654                 if (!cpuc->constraint_list)
3655                         goto err_shared_regs;
3656         }
3657 
3658         if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
3659                 cpuc->excl_cntrs = allocate_excl_cntrs(cpu);
3660                 if (!cpuc->excl_cntrs)
3661                         goto err_constraint_list;
3662 
3663                 cpuc->excl_thread_id = 0;
3664         }
3665 
3666         return 0;
3667 
3668 err_constraint_list:
3669         kfree(cpuc->constraint_list);
3670         cpuc->constraint_list = NULL;
3671 
3672 err_shared_regs:
3673         kfree(cpuc->shared_regs);
3674         cpuc->shared_regs = NULL;
3675 
3676 err:
3677         return -ENOMEM;
3678 }
3679 
3680 static int intel_pmu_cpu_prepare(int cpu)
3681 {
3682         return intel_cpuc_prepare(&per_cpu(cpu_hw_events, cpu), cpu);
3683 }
3684 
3685 static void flip_smm_bit(void *data)
3686 {
3687         unsigned long set = *(unsigned long *)data;
3688 
3689         if (set > 0) {
3690                 msr_set_bit(MSR_IA32_DEBUGCTLMSR,
3691                             DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
3692         } else {
3693                 msr_clear_bit(MSR_IA32_DEBUGCTLMSR,
3694                               DEBUGCTLMSR_FREEZE_IN_SMM_BIT);
3695         }
3696 }
3697 
3698 static void intel_pmu_cpu_starting(int cpu)
3699 {
3700         struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu);
3701         int core_id = topology_core_id(cpu);
3702         int i;
3703 
3704         init_debug_store_on_cpu(cpu);
3705         /*
3706          * Deal with CPUs that don't clear their LBRs on power-up.
3707          */
3708         intel_pmu_lbr_reset();
3709 
3710         cpuc->lbr_sel = NULL;
3711 
3712         if (x86_pmu.flags & PMU_FL_TFA) {
3713                 WARN_ON_ONCE(cpuc->tfa_shadow);
3714                 cpuc->tfa_shadow = ~0ULL;
3715                 intel_set_tfa(cpuc, false);
3716         }
3717 
3718         if (x86_pmu.version > 1)
3719                 flip_smm_bit(&x86_pmu.attr_freeze_on_smi);
3720 
3721         if (x86_pmu.counter_freezing)
3722                 enable_counter_freeze();
3723 
3724         if (!cpuc->shared_regs)
3725                 return;
3726 
3727         if (!(x86_pmu.flags & PMU_FL_NO_HT_SHARING)) {
3728                 for_each_cpu(i, topology_sibling_cpumask(cpu)) {
3729                         struct intel_shared_regs *pc;
3730 
3731                         pc = per_cpu(cpu_hw_events, i).shared_regs;
3732                         if (pc && pc->core_id == core_id) {
3733                                 cpuc->kfree_on_online[0] = cpuc->shared_regs;
3734                                 cpuc->shared_regs = pc;
3735                                 break;
3736                         }
3737                 }
3738                 cpuc->shared_regs->core_id = core_id;
3739                 cpuc->shared_regs->refcnt++;
3740         }
3741 
3742         if (x86_pmu.lbr_sel_map)
3743                 cpuc->lbr_sel = &cpuc->shared_regs->regs[EXTRA_REG_LBR];
3744 
3745         if (x86_pmu.flags & PMU_FL_EXCL_CNTRS) {
3746                 for_each_cpu(i, topology_sibling_cpumask(cpu)) {
3747                         struct cpu_hw_events *sibling;
3748                         struct intel_excl_cntrs *c;
3749 
3750                         sibling = &per_cpu(cpu_hw_events, i);
3751                         c = sibling->excl_cntrs;
3752                         if (c && c->core_id == core_id) {
3753                                 cpuc->kfree_on_online[1] = cpuc->excl_cntrs;
3754                                 cpuc->excl_cntrs = c;
3755                                 if (!sibling->excl_thread_id)
3756                                         cpuc->excl_thread_id = 1;
3757                                 break;
3758                         }
3759                 }
3760                 cpuc->excl_cntrs->core_id = core_id;
3761                 cpuc->excl_cntrs->refcnt++;
3762         }
3763 }
3764 
3765 static void free_excl_cntrs(struct cpu_hw_events *cpuc)
3766 {
3767         struct intel_excl_cntrs *c;
3768 
3769         c = cpuc->excl_cntrs;
3770         if (c) {
3771                 if (c->core_id == -1 || --c->refcnt == 0)
3772                         kfree(c);
3773                 cpuc->excl_cntrs = NULL;
3774         }
3775 
3776         kfree(cpuc->constraint_list);
3777         cpuc->constraint_list = NULL;
3778 }
3779 
3780 static void intel_pmu_cpu_dying(int cpu)
3781 {
3782         fini_debug_store_on_cpu(cpu);
3783 
3784         if (x86_pmu.counter_freezing)
3785                 disable_counter_freeze();
3786 }
3787 
3788 void intel_cpuc_finish(struct cpu_hw_events *cpuc)
3789 {
3790         struct intel_shared_regs *pc;
3791 
3792         pc = cpuc->shared_regs;
3793         if (pc) {
3794                 if (pc->core_id == -1 || --pc->refcnt == 0)
3795                         kfree(pc);
3796                 cpuc->shared_regs = NULL;
3797         }
3798 
3799         free_excl_cntrs(cpuc);
3800 }
3801 
3802 static void intel_pmu_cpu_dead(int cpu)
3803 {
3804         intel_cpuc_finish(&per_cpu(cpu_hw_events, cpu));
3805 }
3806 
3807 static void intel_pmu_sched_task(struct perf_event_context *ctx,
3808                                  bool sched_in)
3809 {
3810         intel_pmu_pebs_sched_task(ctx, sched_in);
3811         intel_pmu_lbr_sched_task(ctx, sched_in);
3812 }
3813 
3814 static int intel_pmu_check_period(struct perf_event *event, u64 value)
3815 {
3816         return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
3817 }
3818 
3819 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
3820 
3821 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
3822 
3823 PMU_FORMAT_ATTR(frontend, "config1:0-23");
3824 
3825 static struct attribute *intel_arch3_formats_attr[] = {
3826         &format_attr_event.attr,
3827         &format_attr_umask.attr,
3828         &format_attr_edge.attr,
3829         &format_attr_pc.attr,
3830         &format_attr_any.attr,
3831         &format_attr_inv.attr,
3832         &format_attr_cmask.attr,
3833         NULL,
3834 };
3835 
3836 static struct attribute *hsw_format_attr[] = {
3837         &format_attr_in_tx.attr,
3838         &format_attr_in_tx_cp.attr,
3839         &format_attr_offcore_rsp.attr,
3840         &format_attr_ldlat.attr,
3841         NULL
3842 };
3843 
3844 static struct attribute *nhm_format_attr[] = {
3845         &format_attr_offcore_rsp.attr,
3846         &format_attr_ldlat.attr,
3847         NULL
3848 };
3849 
3850 static struct attribute *slm_format_attr[] = {
3851         &format_attr_offcore_rsp.attr,
3852         NULL
3853 };
3854 
3855 static struct attribute *skl_format_attr[] = {
3856         &format_attr_frontend.attr,
3857         NULL,
3858 };
3859 
3860 static __initconst const struct x86_pmu core_pmu = {
3861         .name                   = "core",
3862         .handle_irq             = x86_pmu_handle_irq,
3863         .disable_all            = x86_pmu_disable_all,
3864         .enable_all             = core_pmu_enable_all,
3865         .enable                 = core_pmu_enable_event,
3866         .disable                = x86_pmu_disable_event,
3867         .hw_config              = core_pmu_hw_config,
3868         .schedule_events        = x86_schedule_events,
3869         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
3870         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
3871         .event_map              = intel_pmu_event_map,
3872         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
3873         .apic                   = 1,
3874         .large_pebs_flags       = LARGE_PEBS_FLAGS,
3875 
3876         /*
3877          * Intel PMCs cannot be accessed sanely above 32-bit width,
3878          * so we install an artificial 1<<31 period regardless of
3879          * the generic event period:
3880          */
3881         .max_period             = (1ULL<<31) - 1,
3882         .get_event_constraints  = intel_get_event_constraints,
3883         .put_event_constraints  = intel_put_event_constraints,
3884         .event_constraints      = intel_core_event_constraints,
3885         .guest_get_msrs         = core_guest_get_msrs,
3886         .format_attrs           = intel_arch_formats_attr,
3887         .events_sysfs_show      = intel_event_sysfs_show,
3888 
3889         /*
3890          * Virtual (or funny metal) CPU can define x86_pmu.extra_regs
3891          * together with PMU version 1 and thus be using core_pmu with
3892          * shared_regs. We need following callbacks here to allocate
3893          * it properly.
3894          */
3895         .cpu_prepare            = intel_pmu_cpu_prepare,
3896         .cpu_starting           = intel_pmu_cpu_starting,
3897         .cpu_dying              = intel_pmu_cpu_dying,
3898         .cpu_dead               = intel_pmu_cpu_dead,
3899 
3900         .check_period           = intel_pmu_check_period,
3901 };
3902 
3903 static struct attribute *intel_pmu_attrs[];
3904 
3905 static __initconst const struct x86_pmu intel_pmu = {
3906         .name                   = "Intel",
3907         .handle_irq             = intel_pmu_handle_irq,
3908         .disable_all            = intel_pmu_disable_all,
3909         .enable_all             = intel_pmu_enable_all,
3910         .enable                 = intel_pmu_enable_event,
3911         .disable                = intel_pmu_disable_event,
3912         .add                    = intel_pmu_add_event,
3913         .del                    = intel_pmu_del_event,
3914         .read                   = intel_pmu_read_event,
3915         .hw_config              = intel_pmu_hw_config,
3916         .schedule_events        = x86_schedule_events,
3917         .eventsel               = MSR_ARCH_PERFMON_EVENTSEL0,
3918         .perfctr                = MSR_ARCH_PERFMON_PERFCTR0,
3919         .event_map              = intel_pmu_event_map,
3920         .max_events             = ARRAY_SIZE(intel_perfmon_event_map),
3921         .apic                   = 1,
3922         .large_pebs_flags       = LARGE_PEBS_FLAGS,
3923         /*
3924          * Intel PMCs cannot be accessed sanely above 32 bit width,
3925          * so we install an artificial 1<<31 period regardless of
3926          * the generic event period:
3927          */
3928         .max_period             = (1ULL << 31) - 1,
3929         .get_event_constraints  = intel_get_event_constraints,
3930         .put_event_constraints  = intel_put_event_constraints,
3931         .pebs_aliases           = intel_pebs_aliases_core2,
3932 
3933         .format_attrs           = intel_arch3_formats_attr,
3934         .events_sysfs_show      = intel_event_sysfs_show,
3935 
3936         .attrs                  = intel_pmu_attrs,
3937 
3938         .cpu_prepare            = intel_pmu_cpu_prepare,
3939         .cpu_starting           = intel_pmu_cpu_starting,
3940         .cpu_dying              = intel_pmu_cpu_dying,
3941         .cpu_dead               = intel_pmu_cpu_dead,
3942 
3943         .guest_get_msrs         = intel_guest_get_msrs,
3944         .sched_task             = intel_pmu_sched_task,
3945 
3946         .check_period           = intel_pmu_check_period,
3947 };
3948 
3949 static __init void intel_clovertown_quirk(void)
3950 {
3951         /*
3952          * PEBS is unreliable due to:
3953          *
3954          *   AJ67  - PEBS may experience CPL leaks
3955          *   AJ68  - PEBS PMI may be delayed by one event
3956          *   AJ69  - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12]
3957          *   AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS
3958          *
3959          * AJ67 could be worked around by restricting the OS/USR flags.
3960          * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI.
3961          *
3962          * AJ106 could possibly be worked around by not allowing LBR
3963          *       usage from PEBS, including the fixup.
3964          * AJ68  could possibly be worked around by always programming
3965          *       a pebs_event_reset[0] value and coping with the lost events.
3966          *
3967          * But taken together it might just make sense to not enable PEBS on
3968          * these chips.
3969          */
3970         pr_warn("PEBS disabled due to CPU errata\n");
3971         x86_pmu.pebs = 0;
3972         x86_pmu.pebs_constraints = NULL;
3973 }
3974 
3975 static const struct x86_cpu_desc isolation_ucodes[] = {
3976         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_CORE,          3, 0x0000001f),
3977         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_ULT,           1, 0x0000001e),
3978         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_GT3E,          1, 0x00000015),
3979         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,             2, 0x00000037),
3980         INTEL_CPU_DESC(INTEL_FAM6_HASWELL_X,             4, 0x0000000a),
3981         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_CORE,        4, 0x00000023),
3982         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_GT3E,        1, 0x00000014),
3983         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      2, 0x00000010),
3984         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      3, 0x07000009),
3985         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      4, 0x0f000009),
3986         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_XEON_D,      5, 0x0e000002),
3987         INTEL_CPU_DESC(INTEL_FAM6_BROADWELL_X,           2, 0x0b000014),
3988         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             3, 0x00000021),
3989         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             4, 0x00000000),
3990         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_MOBILE,        3, 0x0000007c),
3991         INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_DESKTOP,       3, 0x0000007c),
3992         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,      9, 0x0000004e),
3993         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,       9, 0x0000004e),
3994         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,      10, 0x0000004e),
3995         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,      11, 0x0000004e),
3996         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_MOBILE,      12, 0x0000004e),
3997         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     10, 0x0000004e),
3998         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     11, 0x0000004e),
3999         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     12, 0x0000004e),
4000         INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE_DESKTOP,     13, 0x0000004e),
4001         {}
4002 };
4003 
4004 static void intel_check_pebs_isolation(void)
4005 {
4006         x86_pmu.pebs_no_isolation = !x86_cpu_has_min_microcode_rev(isolation_ucodes);
4007 }
4008 
4009 static __init void intel_pebs_isolation_quirk(void)
4010 {
4011         WARN_ON_ONCE(x86_pmu.check_microcode);
4012         x86_pmu.check_microcode = intel_check_pebs_isolation;
4013         intel_check_pebs_isolation();
4014 }
4015 
4016 static const struct x86_cpu_desc pebs_ucodes[] = {
4017         INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE,          7, 0x00000028),
4018         INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X,        6, 0x00000618),
4019         INTEL_CPU_DESC(INTEL_FAM6_SANDYBRIDGE_X,        7, 0x0000070c),
4020         {}
4021 };
4022 
4023 static bool intel_snb_pebs_broken(void)
4024 {
4025         return !x86_cpu_has_min_microcode_rev(pebs_ucodes);
4026 }
4027 
4028 static void intel_snb_check_microcode(void)
4029 {
4030         if (intel_snb_pebs_broken() == x86_pmu.pebs_broken)
4031                 return;
4032 
4033         /*
4034          * Serialized by the microcode lock..
4035          */
4036         if (x86_pmu.pebs_broken) {
4037                 pr_info("PEBS enabled due to microcode update\n");
4038                 x86_pmu.pebs_broken = 0;
4039         } else {
4040                 pr_info("PEBS disabled due to CPU errata, please upgrade microcode\n");
4041                 x86_pmu.pebs_broken = 1;
4042         }
4043 }
4044 
4045 static bool is_lbr_from(unsigned long msr)
4046 {
4047         unsigned long lbr_from_nr = x86_pmu.lbr_from + x86_pmu.lbr_nr;
4048 
4049         return x86_pmu.lbr_from <= msr && msr < lbr_from_nr;
4050 }
4051 
4052 /*
4053  * Under certain circumstances, access certain MSR may cause #GP.
4054  * The function tests if the input MSR can be safely accessed.
4055  */
4056 static bool check_msr(unsigned long msr, u64 mask)
4057 {
4058         u64 val_old, val_new, val_tmp;
4059 
4060         /*
4061          * Disable the check for real HW, so we don't
4062          * mess with potentionaly enabled registers:
4063          */
4064         if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
4065                 return true;
4066 
4067         /*
4068          * Read the current value, change it and read it back to see if it
4069          * matches, this is needed to detect certain hardware emulators
4070          * (qemu/kvm) that don't trap on the MSR access and always return 0s.
4071          */
4072         if (rdmsrl_safe(msr, &val_old))
4073                 return false;
4074 
4075         /*
4076          * Only change the bits which can be updated by wrmsrl.
4077          */
4078         val_tmp = val_old ^ mask;
4079 
4080         if (is_lbr_from(msr))
4081                 val_tmp = lbr_from_signext_quirk_wr(val_tmp);
4082 
4083         if (wrmsrl_safe(msr, val_tmp) ||
4084             rdmsrl_safe(msr, &val_new))
4085                 return false;
4086 
4087         /*
4088          * Quirk only affects validation in wrmsr(), so wrmsrl()'s value
4089          * should equal rdmsrl()'s even with the quirk.
4090          */
4091         if (val_new != val_tmp)
4092                 return false;
4093 
4094         if (is_lbr_from(msr))
4095                 val_old = lbr_from_signext_quirk_wr(val_old);
4096 
4097         /* Here it's sure that the MSR can be safely accessed.
4098          * Restore the old value and return.
4099          */
4100         wrmsrl(msr, val_old);
4101 
4102         return true;
4103 }
4104 
4105 static __init void intel_sandybridge_quirk(void)
4106 {
4107         x86_pmu.check_microcode = intel_snb_check_microcode;
4108         cpus_read_lock();
4109         intel_snb_check_microcode();
4110         cpus_read_unlock();
4111 }
4112 
4113 static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
4114         { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" },
4115         { PERF_COUNT_HW_INSTRUCTIONS, "instructions" },
4116         { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" },
4117         { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" },
4118         { PERF_COUNT_HW_CACHE_MISSES, "cache misses" },
4119         { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" },
4120         { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" },
4121 };
4122 
4123 static __init void intel_arch_events_quirk(void)
4124 {
4125         int bit;
4126 
4127         /* disable event that reported as not presend by cpuid */
4128         for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(intel_arch_events_map)) {
4129                 intel_perfmon_event_map[intel_arch_events_map[bit].id] = 0;
4130                 pr_warn("CPUID marked event: \'%s\' unavailable\n",
4131                         intel_arch_events_map[bit].name);
4132         }
4133 }
4134 
4135 static __init void intel_nehalem_quirk(void)
4136 {
4137         union cpuid10_ebx ebx;
4138 
4139         ebx.full = x86_pmu.events_maskl;
4140         if (ebx.split.no_branch_misses_retired) {
4141                 /*
4142                  * Erratum AAJ80 detected, we work it around by using
4143                  * the BR_MISP_EXEC.ANY event. This will over-count
4144                  * branch-misses, but it's still much better than the
4145                  * architectural event which is often completely bogus:
4146                  */
4147                 intel_perfmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x7f89;
4148                 ebx.split.no_branch_misses_retired = 0;
4149                 x86_pmu.events_maskl = ebx.full;
4150                 pr_info("CPU erratum AAJ80 worked around\n");
4151         }
4152 }
4153 
4154 static const struct x86_cpu_desc counter_freezing_ucodes[] = {
4155         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,         2, 0x0000000e),
4156         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,         9, 0x0000002e),
4157         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT,        10, 0x00000008),
4158         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_X,       1, 0x00000028),
4159         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,    1, 0x00000028),
4160         INTEL_CPU_DESC(INTEL_FAM6_ATOM_GOLDMONT_PLUS,    8, 0x00000006),
4161         {}
4162 };
4163 
4164 static bool intel_counter_freezing_broken(void)
4165 {
4166         return !x86_cpu_has_min_microcode_rev(counter_freezing_ucodes);
4167 }
4168 
4169 static __init void intel_counter_freezing_quirk(void)
4170 {
4171         /* Check if it's already disabled */
4172         if (disable_counter_freezing)
4173                 return;
4174 
4175         /*
4176          * If the system starts with the wrong ucode, leave the
4177          * counter-freezing feature permanently disabled.
4178          */
4179         if (intel_counter_freezing_broken()) {
4180                 pr_info("PMU counter freezing disabled due to CPU errata,"
4181                         "please upgrade microcode\n");
4182                 x86_pmu.counter_freezing = false;
4183                 x86_pmu.handle_irq = intel_pmu_handle_irq;
4184         }
4185 }
4186 
4187 /*
4188  * enable software workaround for errata:
4189  * SNB: BJ122
4190  * IVB: BV98
4191  * HSW: HSD29
4192  *
4193  * Only needed when HT is enabled. However detecting
4194  * if HT is enabled is difficult (model specific). So instead,
4195  * we enable the workaround in the early boot, and verify if
4196  * it is needed in a later initcall phase once we have valid
4197  * topology information to check if HT is actually enabled
4198  */
4199 static __init void intel_ht_bug(void)
4200 {
4201         x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
4202 
4203         x86_pmu.start_scheduling = intel_start_scheduling;
4204         x86_pmu.commit_scheduling = intel_commit_scheduling;
4205         x86_pmu.stop_scheduling = intel_stop_scheduling;
4206 }
4207 
4208 EVENT_ATTR_STR(mem-loads,       mem_ld_hsw,     "event=0xcd,umask=0x1,ldlat=3");
4209 EVENT_ATTR_STR(mem-stores,      mem_st_hsw,     "event=0xd0,umask=0x82")
4210 
4211 /* Haswell special events */
4212 EVENT_ATTR_STR(tx-start,        tx_start,       "event=0xc9,umask=0x1");
4213 EVENT_ATTR_STR(tx-commit,       tx_commit,      "event=0xc9,umask=0x2");
4214 EVENT_ATTR_STR(tx-abort,        tx_abort,       "event=0xc9,umask=0x4");
4215 EVENT_ATTR_STR(tx-capacity,     tx_capacity,    "event=0x54,umask=0x2");
4216 EVENT_ATTR_STR(tx-conflict,     tx_conflict,    "event=0x54,umask=0x1");
4217 EVENT_ATTR_STR(el-start,        el_start,       "event=0xc8,umask=0x1");
4218 EVENT_ATTR_STR(el-commit,       el_commit,      "event=0xc8,umask=0x2");
4219 EVENT_ATTR_STR(el-abort,        el_abort,       "event=0xc8,umask=0x4");
4220 EVENT_ATTR_STR(el-capacity,     el_capacity,    "event=0x54,umask=0x2");
4221 EVENT_ATTR_STR(el-conflict,     el_conflict,    "event=0x54,umask=0x1");
4222 EVENT_ATTR_STR(cycles-t,        cycles_t,       "event=0x3c,in_tx=1");
4223 EVENT_ATTR_STR(cycles-ct,       cycles_ct,      "event=0x3c,in_tx=1,in_tx_cp=1");
4224 
4225 static struct attribute *hsw_events_attrs[] = {
4226         EVENT_PTR(td_slots_issued),
4227         EVENT_PTR(td_slots_retired),
4228         EVENT_PTR(td_fetch_bubbles),
4229         EVENT_PTR(td_total_slots),
4230         EVENT_PTR(td_total_slots_scale),
4231         EVENT_PTR(td_recovery_bubbles),
4232         EVENT_PTR(td_recovery_bubbles_scale),
4233         NULL
4234 };
4235 
4236 static struct attribute *hsw_mem_events_attrs[] = {
4237         EVENT_PTR(mem_ld_hsw),
4238         EVENT_PTR(mem_st_hsw),
4239         NULL,
4240 };
4241 
4242 static struct attribute *hsw_tsx_events_attrs[] = {
4243         EVENT_PTR(tx_start),
4244         EVENT_PTR(tx_commit),
4245         EVENT_PTR(tx_abort),
4246         EVENT_PTR(tx_capacity),
4247         EVENT_PTR(tx_conflict),
4248         EVENT_PTR(el_start),
4249         EVENT_PTR(el_commit),
4250         EVENT_PTR(el_abort),
4251         EVENT_PTR(el_capacity),
4252         EVENT_PTR(el_conflict),
4253         EVENT_PTR(cycles_t),
4254         EVENT_PTR(cycles_ct),
4255         NULL
4256 };
4257 
4258 EVENT_ATTR_STR(tx-capacity-read,  tx_capacity_read,  "event=0x54,umask=0x80");
4259 EVENT_ATTR_STR(tx-capacity-write, tx_capacity_write, "event=0x54,umask=0x2");
4260 EVENT_ATTR_STR(el-capacity-read,  el_capacity_read,  "event=0x54,umask=0x80");
4261 EVENT_ATTR_STR(el-capacity-write, el_capacity_write, "event=0x54,umask=0x2");
4262 
4263 static struct attribute *icl_events_attrs[] = {
4264         EVENT_PTR(mem_ld_hsw),
4265         EVENT_PTR(mem_st_hsw),
4266         NULL,
4267 };
4268 
4269 static struct attribute *icl_tsx_events_attrs[] = {
4270         EVENT_PTR(tx_start),
4271         EVENT_PTR(tx_abort),
4272         EVENT_PTR(tx_commit),
4273         EVENT_PTR(tx_capacity_read),
4274         EVENT_PTR(tx_capacity_write),
4275         EVENT_PTR(tx_conflict),
4276         EVENT_PTR(el_start),
4277         EVENT_PTR(el_abort),
4278         EVENT_PTR(el_commit),
4279         EVENT_PTR(el_capacity_read),
4280         EVENT_PTR(el_capacity_write),
4281         EVENT_PTR(el_conflict),
4282         EVENT_PTR(cycles_t),
4283         EVENT_PTR(cycles_ct),
4284         NULL,
4285 };
4286 
4287 static __init struct attribute **get_icl_events_attrs(void)
4288 {
4289         return boot_cpu_has(X86_FEATURE_RTM) ?
4290                 merge_attr(icl_events_attrs, icl_tsx_events_attrs) :
4291                 icl_events_attrs;
4292 }
4293 
4294 static ssize_t freeze_on_smi_show(struct device *cdev,
4295                                   struct device_attribute *attr,
4296                                   char *buf)
4297 {
4298         return sprintf(buf, "%lu\n", x86_pmu.attr_freeze_on_smi);
4299 }
4300 
4301 static DEFINE_MUTEX(freeze_on_smi_mutex);
4302 
4303 static ssize_t freeze_on_smi_store(struct device *cdev,
4304                                    struct device_attribute *attr,
4305                                    const char *buf, size_t count)
4306 {
4307         unsigned long val;
4308         ssize_t ret;
4309 
4310         ret = kstrtoul(buf, 0, &val);
4311         if (ret)
4312                 return ret;
4313 
4314         if (val > 1)
4315                 return -EINVAL;
4316 
4317         mutex_lock(&freeze_on_smi_mutex);
4318 
4319         if (x86_pmu.attr_freeze_on_smi == val)
4320                 goto done;
4321 
4322         x86_pmu.attr_freeze_on_smi = val;
4323 
4324         get_online_cpus();
4325         on_each_cpu(flip_smm_bit, &val, 1);
4326         put_online_cpus();
4327 done:
4328         mutex_unlock(&freeze_on_smi_mutex);
4329 
4330         return count;
4331 }
4332 
4333 static void update_tfa_sched(void *ignored)
4334 {
4335         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
4336 
4337         /*
4338          * check if PMC3 is used
4339          * and if so force schedule out for all event types all contexts
4340          */
4341         if (test_bit(3, cpuc->active_mask))
4342                 perf_pmu_resched(x86_get_pmu());
4343 }
4344 
4345 static ssize_t show_sysctl_tfa(struct device *cdev,
4346                               struct device_attribute *attr,
4347                               char *buf)
4348 {
4349         return snprintf(buf, 40, "%d\n", allow_tsx_force_abort);
4350 }
4351 
4352 static ssize_t set_sysctl_tfa(struct device *cdev,
4353                               struct device_attribute *attr,
4354                               const char *buf, size_t count)
4355 {
4356         bool val;
4357         ssize_t ret;
4358 
4359         ret = kstrtobool(buf, &val);
4360         if (ret)
4361                 return ret;
4362 
4363         /* no change */
4364         if (val == allow_tsx_force_abort)
4365                 return count;
4366 
4367         allow_tsx_force_abort = val;
4368 
4369         get_online_cpus();
4370         on_each_cpu(update_tfa_sched, NULL, 1);
4371         put_online_cpus();
4372 
4373         return count;
4374 }
4375 
4376 
4377 static DEVICE_ATTR_RW(freeze_on_smi);
4378 
4379 static ssize_t branches_show(struct device *cdev,
4380                              struct device_attribute *attr,
4381                              char *buf)
4382 {
4383         return snprintf(buf, PAGE_SIZE, "%d\n", x86_pmu.lbr_nr);
4384 }
4385 
4386 static DEVICE_ATTR_RO(branches);
4387 
4388 static struct attribute *lbr_attrs[] = {
4389         &dev_attr_branches.attr,
4390         NULL
4391 };
4392 
4393 static char pmu_name_str[30];
4394 
4395 static ssize_t pmu_name_show(struct device *cdev,
4396                              struct device_attribute *attr,
4397                              char *buf)
4398 {
4399         return snprintf(buf, PAGE_SIZE, "%s\n", pmu_name_str);
4400 }
4401 
4402 static DEVICE_ATTR_RO(pmu_name);
4403 
4404 static struct attribute *intel_pmu_caps_attrs[] = {
4405        &dev_attr_pmu_name.attr,
4406        NULL
4407 };
4408 
4409 static DEVICE_ATTR(allow_tsx_force_abort, 0644,
4410                    show_sysctl_tfa,
4411                    set_sysctl_tfa);
4412 
4413 static struct attribute *intel_pmu_attrs[] = {
4414         &dev_attr_freeze_on_smi.attr,
4415         NULL, /* &dev_attr_allow_tsx_force_abort.attr.attr */
4416         NULL,
4417 };
4418 
4419 static __init struct attribute **
4420 get_events_attrs(struct attribute **base,
4421                  struct attribute **mem,
4422                  struct attribute **tsx)
4423 {
4424         struct attribute **attrs = base;
4425         struct attribute **old;
4426 
4427         if (mem && x86_pmu.pebs)
4428                 attrs = merge_attr(attrs, mem);
4429 
4430         if (tsx && boot_cpu_has(X86_FEATURE_RTM)) {
4431                 old = attrs;
4432                 attrs = merge_attr(attrs, tsx);
4433                 if (old != base)
4434                         kfree(old);
4435         }
4436 
4437         return attrs;
4438 }
4439 
4440 __init int intel_pmu_init(void)
4441 {
4442         struct attribute **extra_attr = NULL;
4443         struct attribute **mem_attr = NULL;
4444         struct attribute **tsx_attr = NULL;
4445         struct attribute **to_free = NULL;
4446         union cpuid10_edx edx;
4447         union cpuid10_eax eax;
4448         union cpuid10_ebx ebx;
4449         struct event_constraint *c;
4450         unsigned int unused;
4451         struct extra_reg *er;
4452         bool pmem = false;
4453         int version, i;
4454         char *name;
4455 
4456         if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
4457                 switch (boot_cpu_data.x86) {
4458                 case 0x6:
4459                         return p6_pmu_init();
4460                 case 0xb:
4461                         return knc_pmu_init();
4462                 case 0xf:
4463                         return p4_pmu_init();
4464                 }
4465                 return -ENODEV;
4466         }
4467 
4468         /*
4469          * Check whether the Architectural PerfMon supports
4470          * Branch Misses Retired hw_event or not.
4471          */
4472         cpuid(10, &eax.full, &ebx.full, &unused, &edx.full);
4473         if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT)
4474                 return -ENODEV;
4475 
4476         version = eax.split.version_id;
4477         if (version < 2)
4478                 x86_pmu = core_pmu;
4479         else
4480                 x86_pmu = intel_pmu;
4481 
4482         x86_pmu.version                 = version;
4483         x86_pmu.num_counters            = eax.split.num_counters;
4484         x86_pmu.cntval_bits             = eax.split.bit_width;
4485         x86_pmu.cntval_mask             = (1ULL << eax.split.bit_width) - 1;
4486 
4487         x86_pmu.events_maskl            = ebx.full;
4488         x86_pmu.events_mask_len         = eax.split.mask_length;
4489 
4490         x86_pmu.max_pebs_events         = min_t(unsigned, MAX_PEBS_EVENTS, x86_pmu.num_counters);
4491 
4492         /*
4493          * Quirk: v2 perfmon does not report fixed-purpose events, so
4494          * assume at least 3 events, when not running in a hypervisor:
4495          */
4496         if (version > 1) {
4497                 int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR);
4498 
4499                 x86_pmu.num_counters_fixed =
4500                         max((int)edx.split.num_counters_fixed, assume);
4501         }
4502 
4503         if (version >= 4)
4504                 x86_pmu.counter_freezing = !disable_counter_freezing;
4505 
4506         if (boot_cpu_has(X86_FEATURE_PDCM)) {
4507                 u64 capabilities;
4508 
4509                 rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
4510                 x86_pmu.intel_cap.capabilities = capabilities;
4511         }
4512 
4513         intel_ds_init();
4514 
4515         x86_add_quirk(intel_arch_events_quirk); /* Install first, so it runs last */
4516 
4517         /*
4518          * Install the hw-cache-events table:
4519          */
4520         switch (boot_cpu_data.x86_model) {
4521         case INTEL_FAM6_CORE_YONAH:
4522                 pr_cont("Core events, ");
4523                 name = "core";
4524                 break;
4525 
4526         case INTEL_FAM6_CORE2_MEROM:
4527                 x86_add_quirk(intel_clovertown_quirk);
4528                 /* fall through */
4529 
4530         case INTEL_FAM6_CORE2_MEROM_L:
4531         case INTEL_FAM6_CORE2_PENRYN:
4532         case INTEL_FAM6_CORE2_DUNNINGTON:
4533                 memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
4534                        sizeof(hw_cache_event_ids));
4535 
4536                 intel_pmu_lbr_init_core();
4537 
4538                 x86_pmu.event_constraints = intel_core2_event_constraints;
4539                 x86_pmu.pebs_constraints = intel_core2_pebs_event_constraints;
4540                 pr_cont("Core2 events, ");
4541                 name = "core2";
4542                 break;
4543 
4544         case INTEL_FAM6_NEHALEM:
4545         case INTEL_FAM6_NEHALEM_EP:
4546         case INTEL_FAM6_NEHALEM_EX:
4547                 memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
4548                        sizeof(hw_cache_event_ids));
4549                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
4550                        sizeof(hw_cache_extra_regs));
4551 
4552                 intel_pmu_lbr_init_nhm();
4553 
4554                 x86_pmu.event_constraints = intel_nehalem_event_constraints;
4555                 x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
4556                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
4557                 x86_pmu.extra_regs = intel_nehalem_extra_regs;
4558                 x86_pmu.limit_period = nhm_limit_period;
4559 
4560                 mem_attr = nhm_mem_events_attrs;
4561 
4562                 /* UOPS_ISSUED.STALLED_CYCLES */
4563                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4564                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4565                 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
4566                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
4567                         X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
4568 
4569                 intel_pmu_pebs_data_source_nhm();
4570                 x86_add_quirk(intel_nehalem_quirk);
4571                 x86_pmu.pebs_no_tlb = 1;
4572                 extra_attr = nhm_format_attr;
4573 
4574                 pr_cont("Nehalem events, ");
4575                 name = "nehalem";
4576                 break;
4577 
4578         case INTEL_FAM6_ATOM_BONNELL:
4579         case INTEL_FAM6_ATOM_BONNELL_MID:
4580         case INTEL_FAM6_ATOM_SALTWELL:
4581         case INTEL_FAM6_ATOM_SALTWELL_MID:
4582         case INTEL_FAM6_ATOM_SALTWELL_TABLET:
4583                 memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
4584                        sizeof(hw_cache_event_ids));
4585 
4586                 intel_pmu_lbr_init_atom();
4587 
4588                 x86_pmu.event_constraints = intel_gen_event_constraints;
4589                 x86_pmu.pebs_constraints = intel_atom_pebs_event_constraints;
4590                 x86_pmu.pebs_aliases = intel_pebs_aliases_core2;
4591                 pr_cont("Atom events, ");
4592                 name = "bonnell";
4593                 break;
4594 
4595         case INTEL_FAM6_ATOM_SILVERMONT:
4596         case INTEL_FAM6_ATOM_SILVERMONT_X:
4597         case INTEL_FAM6_ATOM_SILVERMONT_MID:
4598         case INTEL_FAM6_ATOM_AIRMONT:
4599         case INTEL_FAM6_ATOM_AIRMONT_MID:
4600                 memcpy(hw_cache_event_ids, slm_hw_cache_event_ids,
4601                         sizeof(hw_cache_event_ids));
4602                 memcpy(hw_cache_extra_regs, slm_hw_cache_extra_regs,
4603                        sizeof(hw_cache_extra_regs));
4604 
4605                 intel_pmu_lbr_init_slm();
4606 
4607                 x86_pmu.event_constraints = intel_slm_event_constraints;
4608                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
4609                 x86_pmu.extra_regs = intel_slm_extra_regs;
4610                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4611                 x86_pmu.cpu_events = slm_events_attrs;
4612                 extra_attr = slm_format_attr;
4613                 pr_cont("Silvermont events, ");
4614                 name = "silvermont";
4615                 break;
4616 
4617         case INTEL_FAM6_ATOM_GOLDMONT:
4618         case INTEL_FAM6_ATOM_GOLDMONT_X:
4619                 x86_add_quirk(intel_counter_freezing_quirk);
4620                 memcpy(hw_cache_event_ids, glm_hw_cache_event_ids,
4621                        sizeof(hw_cache_event_ids));
4622                 memcpy(hw_cache_extra_regs, glm_hw_cache_extra_regs,
4623                        sizeof(hw_cache_extra_regs));
4624 
4625                 intel_pmu_lbr_init_skl();
4626 
4627                 x86_pmu.event_constraints = intel_slm_event_constraints;
4628                 x86_pmu.pebs_constraints = intel_glm_pebs_event_constraints;
4629                 x86_pmu.extra_regs = intel_glm_extra_regs;
4630                 /*
4631                  * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
4632                  * for precise cycles.
4633                  * :pp is identical to :ppp
4634                  */
4635                 x86_pmu.pebs_aliases = NULL;
4636                 x86_pmu.pebs_prec_dist = true;
4637                 x86_pmu.lbr_pt_coexist = true;
4638                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4639                 x86_pmu.cpu_events = glm_events_attrs;
4640                 extra_attr = slm_format_attr;
4641                 pr_cont("Goldmont events, ");
4642                 name = "goldmont";
4643                 break;
4644 
4645         case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
4646                 x86_add_quirk(intel_counter_freezing_quirk);
4647                 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
4648                        sizeof(hw_cache_event_ids));
4649                 memcpy(hw_cache_extra_regs, glp_hw_cache_extra_regs,
4650                        sizeof(hw_cache_extra_regs));
4651 
4652                 intel_pmu_lbr_init_skl();
4653 
4654                 x86_pmu.event_constraints = intel_slm_event_constraints;
4655                 x86_pmu.extra_regs = intel_glm_extra_regs;
4656                 /*
4657                  * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
4658                  * for precise cycles.
4659                  */
4660                 x86_pmu.pebs_aliases = NULL;
4661                 x86_pmu.pebs_prec_dist = true;
4662                 x86_pmu.lbr_pt_coexist = true;
4663                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4664                 x86_pmu.flags |= PMU_FL_PEBS_ALL;
4665                 x86_pmu.get_event_constraints = glp_get_event_constraints;
4666                 x86_pmu.cpu_events = glm_events_attrs;
4667                 /* Goldmont Plus has 4-wide pipeline */
4668                 event_attr_td_total_slots_scale_glm.event_str = "4";
4669                 extra_attr = slm_format_attr;
4670                 pr_cont("Goldmont plus events, ");
4671                 name = "goldmont_plus";
4672                 break;
4673 
4674         case INTEL_FAM6_ATOM_TREMONT_X:
4675                 x86_pmu.late_ack = true;
4676                 memcpy(hw_cache_event_ids, glp_hw_cache_event_ids,
4677                        sizeof(hw_cache_event_ids));
4678                 memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs,
4679                        sizeof(hw_cache_extra_regs));
4680                 hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
4681 
4682                 intel_pmu_lbr_init_skl();
4683 
4684                 x86_pmu.event_constraints = intel_slm_event_constraints;
4685                 x86_pmu.extra_regs = intel_tnt_extra_regs;
4686                 /*
4687                  * It's recommended to use CPU_CLK_UNHALTED.CORE_P + NPEBS
4688                  * for precise cycles.
4689                  */
4690                 x86_pmu.pebs_aliases = NULL;
4691                 x86_pmu.pebs_prec_dist = true;
4692                 x86_pmu.lbr_pt_coexist = true;
4693                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4694                 x86_pmu.get_event_constraints = tnt_get_event_constraints;
4695                 extra_attr = slm_format_attr;
4696                 pr_cont("Tremont events, ");
4697                 name = "Tremont";
4698                 break;
4699 
4700         case INTEL_FAM6_WESTMERE:
4701         case INTEL_FAM6_WESTMERE_EP:
4702         case INTEL_FAM6_WESTMERE_EX:
4703                 memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
4704                        sizeof(hw_cache_event_ids));
4705                 memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
4706                        sizeof(hw_cache_extra_regs));
4707 
4708                 intel_pmu_lbr_init_nhm();
4709 
4710                 x86_pmu.event_constraints = intel_westmere_event_constraints;
4711                 x86_pmu.enable_all = intel_pmu_nhm_enable_all;
4712                 x86_pmu.pebs_constraints = intel_westmere_pebs_event_constraints;
4713                 x86_pmu.extra_regs = intel_westmere_extra_regs;
4714                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4715 
4716                 mem_attr = nhm_mem_events_attrs;
4717 
4718                 /* UOPS_ISSUED.STALLED_CYCLES */
4719                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4720                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4721                 /* UOPS_EXECUTED.CORE_ACTIVE_CYCLES,c=1,i=1 */
4722                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
4723                         X86_CONFIG(.event=0xb1, .umask=0x3f, .inv=1, .cmask=1);
4724 
4725                 intel_pmu_pebs_data_source_nhm();
4726                 extra_attr = nhm_format_attr;
4727                 pr_cont("Westmere events, ");
4728                 name = "westmere";
4729                 break;
4730 
4731         case INTEL_FAM6_SANDYBRIDGE:
4732         case INTEL_FAM6_SANDYBRIDGE_X:
4733                 x86_add_quirk(intel_sandybridge_quirk);
4734                 x86_add_quirk(intel_ht_bug);
4735                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
4736                        sizeof(hw_cache_event_ids));
4737                 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
4738                        sizeof(hw_cache_extra_regs));
4739 
4740                 intel_pmu_lbr_init_snb();
4741 
4742                 x86_pmu.event_constraints = intel_snb_event_constraints;
4743                 x86_pmu.pebs_constraints = intel_snb_pebs_event_constraints;
4744                 x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
4745                 if (boot_cpu_data.x86_model == INTEL_FAM6_SANDYBRIDGE_X)
4746                         x86_pmu.extra_regs = intel_snbep_extra_regs;
4747                 else
4748                         x86_pmu.extra_regs = intel_snb_extra_regs;
4749 
4750 
4751                 /* all extra regs are per-cpu when HT is on */
4752                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4753                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4754 
4755                 x86_pmu.cpu_events = snb_events_attrs;
4756                 mem_attr = snb_mem_events_attrs;
4757 
4758                 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
4759                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4760                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4761                 /* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
4762                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] =
4763                         X86_CONFIG(.event=0xb1, .umask=0x01, .inv=1, .cmask=1);
4764 
4765                 extra_attr = nhm_format_attr;
4766 
4767                 pr_cont("SandyBridge events, ");
4768                 name = "sandybridge";
4769                 break;
4770 
4771         case INTEL_FAM6_IVYBRIDGE:
4772         case INTEL_FAM6_IVYBRIDGE_X:
4773                 x86_add_quirk(intel_ht_bug);
4774                 memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
4775                        sizeof(hw_cache_event_ids));
4776                 /* dTLB-load-misses on IVB is different than SNB */
4777                 hw_cache_event_ids[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = 0x8108; /* DTLB_LOAD_MISSES.DEMAND_LD_MISS_CAUSES_A_WALK */
4778 
4779                 memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
4780                        sizeof(hw_cache_extra_regs));
4781 
4782                 intel_pmu_lbr_init_snb();
4783 
4784                 x86_pmu.event_constraints = intel_ivb_event_constraints;
4785                 x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
4786                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
4787                 x86_pmu.pebs_prec_dist = true;
4788                 if (boot_cpu_data.x86_model == INTEL_FAM6_IVYBRIDGE_X)
4789                         x86_pmu.extra_regs = intel_snbep_extra_regs;
4790                 else
4791                         x86_pmu.extra_regs = intel_snb_extra_regs;
4792                 /* all extra regs are per-cpu when HT is on */
4793                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4794                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4795 
4796                 x86_pmu.cpu_events = snb_events_attrs;
4797                 mem_attr = snb_mem_events_attrs;
4798 
4799                 /* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
4800                 intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
4801                         X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
4802 
4803                 extra_attr = nhm_format_attr;
4804 
4805                 pr_cont("IvyBridge events, ");
4806                 name = "ivybridge";
4807                 break;
4808 
4809 
4810         case INTEL_FAM6_HASWELL_CORE:
4811         case INTEL_FAM6_HASWELL_X:
4812         case INTEL_FAM6_HASWELL_ULT:
4813         case INTEL_FAM6_HASWELL_GT3E:
4814                 x86_add_quirk(intel_ht_bug);
4815                 x86_add_quirk(intel_pebs_isolation_quirk);
4816                 x86_pmu.late_ack = true;
4817                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4818                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4819 
4820                 intel_pmu_lbr_init_hsw();
4821 
4822                 x86_pmu.event_constraints = intel_hsw_event_constraints;
4823                 x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
4824                 x86_pmu.extra_regs = intel_snbep_extra_regs;
4825                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
4826                 x86_pmu.pebs_prec_dist = true;
4827                 /* all extra regs are per-cpu when HT is on */
4828                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4829                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4830 
4831                 x86_pmu.hw_config = hsw_hw_config;
4832                 x86_pmu.get_event_constraints = hsw_get_event_constraints;
4833                 x86_pmu.cpu_events = hsw_events_attrs;
4834                 x86_pmu.lbr_double_abort = true;
4835                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4836                         hsw_format_attr : nhm_format_attr;
4837                 mem_attr = hsw_mem_events_attrs;
4838                 tsx_attr = hsw_tsx_events_attrs;
4839                 pr_cont("Haswell events, ");
4840                 name = "haswell";
4841                 break;
4842 
4843         case INTEL_FAM6_BROADWELL_CORE:
4844         case INTEL_FAM6_BROADWELL_XEON_D:
4845         case INTEL_FAM6_BROADWELL_GT3E:
4846         case INTEL_FAM6_BROADWELL_X:
4847                 x86_add_quirk(intel_pebs_isolation_quirk);
4848                 x86_pmu.late_ack = true;
4849                 memcpy(hw_cache_event_ids, hsw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4850                 memcpy(hw_cache_extra_regs, hsw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4851 
4852                 /* L3_MISS_LOCAL_DRAM is BIT(26) in Broadwell */
4853                 hw_cache_extra_regs[C(LL)][C(OP_READ)][C(RESULT_MISS)] = HSW_DEMAND_READ |
4854                                                                          BDW_L3_MISS|HSW_SNOOP_DRAM;
4855                 hw_cache_extra_regs[C(LL)][C(OP_WRITE)][C(RESULT_MISS)] = HSW_DEMAND_WRITE|BDW_L3_MISS|
4856                                                                           HSW_SNOOP_DRAM;
4857                 hw_cache_extra_regs[C(NODE)][C(OP_READ)][C(RESULT_ACCESS)] = HSW_DEMAND_READ|
4858                                                                              BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
4859                 hw_cache_extra_regs[C(NODE)][C(OP_WRITE)][C(RESULT_ACCESS)] = HSW_DEMAND_WRITE|
4860                                                                               BDW_L3_MISS_LOCAL|HSW_SNOOP_DRAM;
4861 
4862                 intel_pmu_lbr_init_hsw();
4863 
4864                 x86_pmu.event_constraints = intel_bdw_event_constraints;
4865                 x86_pmu.pebs_constraints = intel_bdw_pebs_event_constraints;
4866                 x86_pmu.extra_regs = intel_snbep_extra_regs;
4867                 x86_pmu.pebs_aliases = intel_pebs_aliases_ivb;
4868                 x86_pmu.pebs_prec_dist = true;
4869                 /* all extra regs are per-cpu when HT is on */
4870                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4871                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4872 
4873                 x86_pmu.hw_config = hsw_hw_config;
4874                 x86_pmu.get_event_constraints = hsw_get_event_constraints;
4875                 x86_pmu.cpu_events = hsw_events_attrs;
4876                 x86_pmu.limit_period = bdw_limit_period;
4877                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4878                         hsw_format_attr : nhm_format_attr;
4879                 mem_attr = hsw_mem_events_attrs;
4880                 tsx_attr = hsw_tsx_events_attrs;
4881                 pr_cont("Broadwell events, ");
4882                 name = "broadwell";
4883                 break;
4884 
4885         case INTEL_FAM6_XEON_PHI_KNL:
4886         case INTEL_FAM6_XEON_PHI_KNM:
4887                 memcpy(hw_cache_event_ids,
4888                        slm_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4889                 memcpy(hw_cache_extra_regs,
4890                        knl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4891                 intel_pmu_lbr_init_knl();
4892 
4893                 x86_pmu.event_constraints = intel_slm_event_constraints;
4894                 x86_pmu.pebs_constraints = intel_slm_pebs_event_constraints;
4895                 x86_pmu.extra_regs = intel_knl_extra_regs;
4896 
4897                 /* all extra regs are per-cpu when HT is on */
4898                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4899                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4900                 extra_attr = slm_format_attr;
4901                 pr_cont("Knights Landing/Mill events, ");
4902                 name = "knights-landing";
4903                 break;
4904 
4905         case INTEL_FAM6_SKYLAKE_X:
4906                 pmem = true;
4907         case INTEL_FAM6_SKYLAKE_MOBILE:
4908         case INTEL_FAM6_SKYLAKE_DESKTOP:
4909         case INTEL_FAM6_KABYLAKE_MOBILE:
4910         case INTEL_FAM6_KABYLAKE_DESKTOP:
4911                 x86_add_quirk(intel_pebs_isolation_quirk);
4912                 x86_pmu.late_ack = true;
4913                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4914                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4915                 intel_pmu_lbr_init_skl();
4916 
4917                 /* INT_MISC.RECOVERY_CYCLES has umask 1 in Skylake */
4918                 event_attr_td_recovery_bubbles.event_str_noht =
4919                         "event=0xd,umask=0x1,cmask=1";
4920                 event_attr_td_recovery_bubbles.event_str_ht =
4921                         "event=0xd,umask=0x1,cmask=1,any=1";
4922 
4923                 x86_pmu.event_constraints = intel_skl_event_constraints;
4924                 x86_pmu.pebs_constraints = intel_skl_pebs_event_constraints;
4925                 x86_pmu.extra_regs = intel_skl_extra_regs;
4926                 x86_pmu.pebs_aliases = intel_pebs_aliases_skl;
4927                 x86_pmu.pebs_prec_dist = true;
4928                 /* all extra regs are per-cpu when HT is on */
4929                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4930                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4931 
4932                 x86_pmu.hw_config = hsw_hw_config;
4933                 x86_pmu.get_event_constraints = hsw_get_event_constraints;
4934                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4935                         hsw_format_attr : nhm_format_attr;
4936                 extra_attr = merge_attr(extra_attr, skl_format_attr);
4937                 to_free = extra_attr;
4938                 x86_pmu.cpu_events = hsw_events_attrs;
4939                 mem_attr = hsw_mem_events_attrs;
4940                 tsx_attr = hsw_tsx_events_attrs;
4941                 intel_pmu_pebs_data_source_skl(pmem);
4942 
4943                 if (boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT)) {
4944                         x86_pmu.flags |= PMU_FL_TFA;
4945                         x86_pmu.get_event_constraints = tfa_get_event_constraints;
4946                         x86_pmu.enable_all = intel_tfa_pmu_enable_all;
4947                         x86_pmu.commit_scheduling = intel_tfa_commit_scheduling;
4948                         intel_pmu_attrs[1] = &dev_attr_allow_tsx_force_abort.attr;
4949                 }
4950 
4951                 pr_cont("Skylake events, ");
4952                 name = "skylake";
4953                 break;
4954 
4955         case INTEL_FAM6_ICELAKE_X:
4956         case INTEL_FAM6_ICELAKE_XEON_D:
4957                 pmem = true;
4958         case INTEL_FAM6_ICELAKE_MOBILE:
4959         case INTEL_FAM6_ICELAKE_DESKTOP:
4960                 x86_pmu.late_ack = true;
4961                 memcpy(hw_cache_event_ids, skl_hw_cache_event_ids, sizeof(hw_cache_event_ids));
4962                 memcpy(hw_cache_extra_regs, skl_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
4963                 hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1;
4964                 intel_pmu_lbr_init_skl();
4965 
4966                 x86_pmu.event_constraints = intel_icl_event_constraints;
4967                 x86_pmu.pebs_constraints = intel_icl_pebs_event_constraints;
4968                 x86_pmu.extra_regs = intel_icl_extra_regs;
4969                 x86_pmu.pebs_aliases = NULL;
4970                 x86_pmu.pebs_prec_dist = true;
4971                 x86_pmu.flags |= PMU_FL_HAS_RSP_1;
4972                 x86_pmu.flags |= PMU_FL_NO_HT_SHARING;
4973 
4974                 x86_pmu.hw_config = hsw_hw_config;
4975                 x86_pmu.get_event_constraints = icl_get_event_constraints;
4976                 extra_attr = boot_cpu_has(X86_FEATURE_RTM) ?
4977                         hsw_format_attr : nhm_format_attr;
4978                 extra_attr = merge_attr(extra_attr, skl_format_attr);
4979                 x86_pmu.cpu_events = get_icl_events_attrs();
4980                 x86_pmu.rtm_abort_event = X86_CONFIG(.event=0xca, .umask=0x02);
4981                 x86_pmu.lbr_pt_coexist = true;
4982                 intel_pmu_pebs_data_source_skl(pmem);
4983                 pr_cont("Icelake events, ");
4984                 name = "icelake";
4985                 break;
4986 
4987         default:
4988                 switch (x86_pmu.version) {
4989                 case 1:
4990                         x86_pmu.event_constraints = intel_v1_event_constraints;
4991                         pr_cont("generic architected perfmon v1, ");
4992                         name = "generic_arch_v1";
4993                         break;
4994                 default:
4995                         /*
4996                          * default constraints for v2 and up
4997                          */
4998                         x86_pmu.event_constraints = intel_gen_event_constraints;
4999                         pr_cont("generic architected perfmon, ");
5000                         name = "generic_arch_v2+";
5001                         break;
5002                 }
5003         }
5004 
5005         snprintf(pmu_name_str, sizeof(pmu_name_str), "%s", name);
5006 
5007         if (version >= 2 && extra_attr) {
5008                 x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
5009                                                   extra_attr);
5010                 WARN_ON(!x86_pmu.format_attrs);
5011         }
5012 
5013         x86_pmu.cpu_events = get_events_attrs(x86_pmu.cpu_events,
5014                                               mem_attr, tsx_attr);
5015 
5016         if (x86_pmu.num_counters > INTEL_PMC_MAX_GENERIC) {
5017                 WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!",
5018                      x86_pmu.num_counters, INTEL_PMC_MAX_GENERIC);
5019                 x86_pmu.num_counters = INTEL_PMC_MAX_GENERIC;
5020         }
5021         x86_pmu.intel_ctrl = (1ULL << x86_pmu.num_counters) - 1;
5022 
5023         if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) {
5024                 WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!",
5025                      x86_pmu.num_counters_fixed, INTEL_PMC_MAX_FIXED);
5026                 x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED;
5027         }
5028 
5029         x86_pmu.intel_ctrl |=
5030                 ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED;
5031 
5032         if (x86_pmu.event_constraints) {
5033                 /*
5034                  * event on fixed counter2 (REF_CYCLES) only works on this
5035                  * counter, so do not extend mask to generic counters
5036                  */
5037                 for_each_event_constraint(c, x86_pmu.event_constraints) {
5038                         if (c->cmask == FIXED_EVENT_FLAGS
5039                             && c->idxmsk64 != INTEL_PMC_MSK_FIXED_REF_CYCLES) {
5040                                 c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1;
5041                         }
5042                         c->idxmsk64 &=
5043                                 ~(~0ULL << (INTEL_PMC_IDX_FIXED + x86_pmu.num_counters_fixed));
5044                         c->weight = hweight64(c->idxmsk64);
5045                 }
5046         }
5047 
5048         /*
5049          * Access LBR MSR may cause #GP under certain circumstances.
5050          * E.g. KVM doesn't support LBR MSR
5051          * Check all LBT MSR here.
5052          * Disable LBR access if any LBR MSRs can not be accessed.
5053          */
5054         if (x86_pmu.lbr_nr && !check_msr(x86_pmu.lbr_tos, 0x3UL))
5055                 x86_pmu.lbr_nr = 0;
5056         for (i = 0; i < x86_pmu.lbr_nr; i++) {
5057                 if (!(check_msr(x86_pmu.lbr_from + i, 0xffffUL) &&
5058                       check_msr(x86_pmu.lbr_to + i, 0xffffUL)))
5059                         x86_pmu.lbr_nr = 0;
5060         }
5061 
5062         x86_pmu.caps_attrs = intel_pmu_caps_attrs;
5063 
5064         if (x86_pmu.lbr_nr) {
5065                 x86_pmu.caps_attrs = merge_attr(x86_pmu.caps_attrs, lbr_attrs);
5066                 pr_cont("%d-deep LBR, ", x86_pmu.lbr_nr);
5067         }
5068 
5069         /*
5070          * Access extra MSR may cause #GP under certain circumstances.
5071          * E.g. KVM doesn't support offcore event
5072          * Check all extra_regs here.
5073          */
5074         if (x86_pmu.extra_regs) {
5075                 for (er = x86_pmu.extra_regs; er->msr; er++) {
5076                         er->extra_msr_access = check_msr(er->msr, 0x11UL);
5077                         /* Disable LBR select mapping */
5078                         if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
5079                                 x86_pmu.lbr_sel_map = NULL;
5080                 }
5081         }
5082 
5083         /* Support full width counters using alternative MSR range */
5084         if (x86_pmu.intel_cap.full_width_write) {
5085                 x86_pmu.max_period = x86_pmu.cntval_mask >> 1;
5086                 x86_pmu.perfctr = MSR_IA32_PMC0;
5087                 pr_cont("full-width counters, ");
5088         }
5089 
5090         /*
5091          * For arch perfmon 4 use counter freezing to avoid
5092          * several MSR accesses in the PMI.
5093          */
5094         if (x86_pmu.counter_freezing)
5095                 x86_pmu.handle_irq = intel_pmu_handle_irq_v4;
5096 
5097         kfree(to_free);
5098         return 0;
5099 }
5100 
5101 /*
5102  * HT bug: phase 2 init
5103  * Called once we have valid topology information to check
5104  * whether or not HT is enabled
5105  * If HT is off, then we disable the workaround
5106  */
5107 static __init int fixup_ht_bug(void)
5108 {
5109         int c;
5110         /*
5111          * problem not present on this CPU model, nothing to do
5112          */
5113         if (!(x86_pmu.flags & PMU_FL_EXCL_ENABLED))
5114                 return 0;
5115 
5116         if (topology_max_smt_threads() > 1) {
5117                 pr_info("PMU erratum BJ122, BV98, HSD29 worked around, HT is on\n");
5118                 return 0;
5119         }
5120 
5121         cpus_read_lock();
5122 
5123         hardlockup_detector_perf_stop();
5124 
5125         x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
5126 
5127         x86_pmu.start_scheduling = NULL;
5128         x86_pmu.commit_scheduling = NULL;
5129         x86_pmu.stop_scheduling = NULL;
5130 
5131         hardlockup_detector_perf_restart();
5132 
5133         for_each_online_cpu(c)
5134                 free_excl_cntrs(&per_cpu(cpu_hw_events, c));
5135 
5136         cpus_read_unlock();
5137         pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
5138         return 0;
5139 }
5140 subsys_initcall(fixup_ht_bug)
5141 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp