~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/events/intel/bts.c

Version: ~ [ linux-5.3 ] ~ [ linux-5.2.15 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.73 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.144 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.193 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.193 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.73 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * BTS PMU driver for perf
  3  * Copyright (c) 2013-2014, Intel Corporation.
  4  *
  5  * This program is free software; you can redistribute it and/or modify it
  6  * under the terms and conditions of the GNU General Public License,
  7  * version 2, as published by the Free Software Foundation.
  8  *
  9  * This program is distributed in the hope it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 12  * more details.
 13  */
 14 
 15 #undef DEBUG
 16 
 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 18 
 19 #include <linux/bitops.h>
 20 #include <linux/types.h>
 21 #include <linux/slab.h>
 22 #include <linux/debugfs.h>
 23 #include <linux/device.h>
 24 #include <linux/coredump.h>
 25 
 26 #include <asm-generic/sizes.h>
 27 #include <asm/perf_event.h>
 28 
 29 #include "../perf_event.h"
 30 
 31 struct bts_ctx {
 32         struct perf_output_handle       handle;
 33         struct debug_store              ds_back;
 34         int                             state;
 35 };
 36 
 37 /* BTS context states: */
 38 enum {
 39         /* no ongoing AUX transactions */
 40         BTS_STATE_STOPPED = 0,
 41         /* AUX transaction is on, BTS tracing is disabled */
 42         BTS_STATE_INACTIVE,
 43         /* AUX transaction is on, BTS tracing is running */
 44         BTS_STATE_ACTIVE,
 45 };
 46 
 47 static DEFINE_PER_CPU(struct bts_ctx, bts_ctx);
 48 
 49 #define BTS_RECORD_SIZE         24
 50 #define BTS_SAFETY_MARGIN       4080
 51 
 52 struct bts_phys {
 53         struct page     *page;
 54         unsigned long   size;
 55         unsigned long   offset;
 56         unsigned long   displacement;
 57 };
 58 
 59 struct bts_buffer {
 60         size_t          real_size;      /* multiple of BTS_RECORD_SIZE */
 61         unsigned int    nr_pages;
 62         unsigned int    nr_bufs;
 63         unsigned int    cur_buf;
 64         bool            snapshot;
 65         local_t         data_size;
 66         local_t         lost;
 67         local_t         head;
 68         unsigned long   end;
 69         void            **data_pages;
 70         struct bts_phys buf[0];
 71 };
 72 
 73 struct pmu bts_pmu;
 74 
 75 static size_t buf_size(struct page *page)
 76 {
 77         return 1 << (PAGE_SHIFT + page_private(page));
 78 }
 79 
 80 static void *
 81 bts_buffer_setup_aux(int cpu, void **pages, int nr_pages, bool overwrite)
 82 {
 83         struct bts_buffer *buf;
 84         struct page *page;
 85         int node = (cpu == -1) ? cpu : cpu_to_node(cpu);
 86         unsigned long offset;
 87         size_t size = nr_pages << PAGE_SHIFT;
 88         int pg, nbuf, pad;
 89 
 90         /* count all the high order buffers */
 91         for (pg = 0, nbuf = 0; pg < nr_pages;) {
 92                 page = virt_to_page(pages[pg]);
 93                 if (WARN_ON_ONCE(!PagePrivate(page) && nr_pages > 1))
 94                         return NULL;
 95                 pg += 1 << page_private(page);
 96                 nbuf++;
 97         }
 98 
 99         /*
100          * to avoid interrupts in overwrite mode, only allow one physical
101          */
102         if (overwrite && nbuf > 1)
103                 return NULL;
104 
105         buf = kzalloc_node(offsetof(struct bts_buffer, buf[nbuf]), GFP_KERNEL, node);
106         if (!buf)
107                 return NULL;
108 
109         buf->nr_pages = nr_pages;
110         buf->nr_bufs = nbuf;
111         buf->snapshot = overwrite;
112         buf->data_pages = pages;
113         buf->real_size = size - size % BTS_RECORD_SIZE;
114 
115         for (pg = 0, nbuf = 0, offset = 0, pad = 0; nbuf < buf->nr_bufs; nbuf++) {
116                 unsigned int __nr_pages;
117 
118                 page = virt_to_page(pages[pg]);
119                 __nr_pages = PagePrivate(page) ? 1 << page_private(page) : 1;
120                 buf->buf[nbuf].page = page;
121                 buf->buf[nbuf].offset = offset;
122                 buf->buf[nbuf].displacement = (pad ? BTS_RECORD_SIZE - pad : 0);
123                 buf->buf[nbuf].size = buf_size(page) - buf->buf[nbuf].displacement;
124                 pad = buf->buf[nbuf].size % BTS_RECORD_SIZE;
125                 buf->buf[nbuf].size -= pad;
126 
127                 pg += __nr_pages;
128                 offset += __nr_pages << PAGE_SHIFT;
129         }
130 
131         return buf;
132 }
133 
134 static void bts_buffer_free_aux(void *data)
135 {
136         kfree(data);
137 }
138 
139 static unsigned long bts_buffer_offset(struct bts_buffer *buf, unsigned int idx)
140 {
141         return buf->buf[idx].offset + buf->buf[idx].displacement;
142 }
143 
144 static void
145 bts_config_buffer(struct bts_buffer *buf)
146 {
147         int cpu = raw_smp_processor_id();
148         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
149         struct bts_phys *phys = &buf->buf[buf->cur_buf];
150         unsigned long index, thresh = 0, end = phys->size;
151         struct page *page = phys->page;
152 
153         index = local_read(&buf->head);
154 
155         if (!buf->snapshot) {
156                 if (buf->end < phys->offset + buf_size(page))
157                         end = buf->end - phys->offset - phys->displacement;
158 
159                 index -= phys->offset + phys->displacement;
160 
161                 if (end - index > BTS_SAFETY_MARGIN)
162                         thresh = end - BTS_SAFETY_MARGIN;
163                 else if (end - index > BTS_RECORD_SIZE)
164                         thresh = end - BTS_RECORD_SIZE;
165                 else
166                         thresh = end;
167         }
168 
169         ds->bts_buffer_base = (u64)(long)page_address(page) + phys->displacement;
170         ds->bts_index = ds->bts_buffer_base + index;
171         ds->bts_absolute_maximum = ds->bts_buffer_base + end;
172         ds->bts_interrupt_threshold = !buf->snapshot
173                 ? ds->bts_buffer_base + thresh
174                 : ds->bts_absolute_maximum + BTS_RECORD_SIZE;
175 }
176 
177 static void bts_buffer_pad_out(struct bts_phys *phys, unsigned long head)
178 {
179         unsigned long index = head - phys->offset;
180 
181         memset(page_address(phys->page) + index, 0, phys->size - index);
182 }
183 
184 static void bts_update(struct bts_ctx *bts)
185 {
186         int cpu = raw_smp_processor_id();
187         struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
188         struct bts_buffer *buf = perf_get_aux(&bts->handle);
189         unsigned long index = ds->bts_index - ds->bts_buffer_base, old, head;
190 
191         if (!buf)
192                 return;
193 
194         head = index + bts_buffer_offset(buf, buf->cur_buf);
195         old = local_xchg(&buf->head, head);
196 
197         if (!buf->snapshot) {
198                 if (old == head)
199                         return;
200 
201                 if (ds->bts_index >= ds->bts_absolute_maximum)
202                         local_inc(&buf->lost);
203 
204                 /*
205                  * old and head are always in the same physical buffer, so we
206                  * can subtract them to get the data size.
207                  */
208                 local_add(head - old, &buf->data_size);
209         } else {
210                 local_set(&buf->data_size, head);
211         }
212 }
213 
214 static int
215 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle);
216 
217 /*
218  * Ordering PMU callbacks wrt themselves and the PMI is done by means
219  * of bts::state, which:
220  *  - is set when bts::handle::event is valid, that is, between
221  *    perf_aux_output_begin() and perf_aux_output_end();
222  *  - is zero otherwise;
223  *  - is ordered against bts::handle::event with a compiler barrier.
224  */
225 
226 static void __bts_event_start(struct perf_event *event)
227 {
228         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
229         struct bts_buffer *buf = perf_get_aux(&bts->handle);
230         u64 config = 0;
231 
232         if (!buf->snapshot)
233                 config |= ARCH_PERFMON_EVENTSEL_INT;
234         if (!event->attr.exclude_kernel)
235                 config |= ARCH_PERFMON_EVENTSEL_OS;
236         if (!event->attr.exclude_user)
237                 config |= ARCH_PERFMON_EVENTSEL_USR;
238 
239         bts_config_buffer(buf);
240 
241         /*
242          * local barrier to make sure that ds configuration made it
243          * before we enable BTS and bts::state goes ACTIVE
244          */
245         wmb();
246 
247         /* INACTIVE/STOPPED -> ACTIVE */
248         WRITE_ONCE(bts->state, BTS_STATE_ACTIVE);
249 
250         intel_pmu_enable_bts(config);
251 
252 }
253 
254 static void bts_event_start(struct perf_event *event, int flags)
255 {
256         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
257         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
258         struct bts_buffer *buf;
259 
260         buf = perf_aux_output_begin(&bts->handle, event);
261         if (!buf)
262                 goto fail_stop;
263 
264         if (bts_buffer_reset(buf, &bts->handle))
265                 goto fail_end_stop;
266 
267         bts->ds_back.bts_buffer_base = cpuc->ds->bts_buffer_base;
268         bts->ds_back.bts_absolute_maximum = cpuc->ds->bts_absolute_maximum;
269         bts->ds_back.bts_interrupt_threshold = cpuc->ds->bts_interrupt_threshold;
270 
271         event->hw.itrace_started = 1;
272         event->hw.state = 0;
273 
274         __bts_event_start(event);
275 
276         return;
277 
278 fail_end_stop:
279         perf_aux_output_end(&bts->handle, 0, false);
280 
281 fail_stop:
282         event->hw.state = PERF_HES_STOPPED;
283 }
284 
285 static void __bts_event_stop(struct perf_event *event, int state)
286 {
287         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
288 
289         /* ACTIVE -> INACTIVE(PMI)/STOPPED(->stop()) */
290         WRITE_ONCE(bts->state, state);
291 
292         /*
293          * No extra synchronization is mandated by the documentation to have
294          * BTS data stores globally visible.
295          */
296         intel_pmu_disable_bts();
297 }
298 
299 static void bts_event_stop(struct perf_event *event, int flags)
300 {
301         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
302         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
303         struct bts_buffer *buf = NULL;
304         int state = READ_ONCE(bts->state);
305 
306         if (state == BTS_STATE_ACTIVE)
307                 __bts_event_stop(event, BTS_STATE_STOPPED);
308 
309         if (state != BTS_STATE_STOPPED)
310                 buf = perf_get_aux(&bts->handle);
311 
312         event->hw.state |= PERF_HES_STOPPED;
313 
314         if (flags & PERF_EF_UPDATE) {
315                 bts_update(bts);
316 
317                 if (buf) {
318                         if (buf->snapshot)
319                                 bts->handle.head =
320                                         local_xchg(&buf->data_size,
321                                                    buf->nr_pages << PAGE_SHIFT);
322 
323                         perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
324                                             !!local_xchg(&buf->lost, 0));
325                 }
326 
327                 cpuc->ds->bts_index = bts->ds_back.bts_buffer_base;
328                 cpuc->ds->bts_buffer_base = bts->ds_back.bts_buffer_base;
329                 cpuc->ds->bts_absolute_maximum = bts->ds_back.bts_absolute_maximum;
330                 cpuc->ds->bts_interrupt_threshold = bts->ds_back.bts_interrupt_threshold;
331         }
332 }
333 
334 void intel_bts_enable_local(void)
335 {
336         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
337         int state = READ_ONCE(bts->state);
338 
339         /*
340          * Here we transition from INACTIVE to ACTIVE;
341          * if we instead are STOPPED from the interrupt handler,
342          * stay that way. Can't be ACTIVE here though.
343          */
344         if (WARN_ON_ONCE(state == BTS_STATE_ACTIVE))
345                 return;
346 
347         if (state == BTS_STATE_STOPPED)
348                 return;
349 
350         if (bts->handle.event)
351                 __bts_event_start(bts->handle.event);
352 }
353 
354 void intel_bts_disable_local(void)
355 {
356         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
357 
358         /*
359          * Here we transition from ACTIVE to INACTIVE;
360          * do nothing for STOPPED or INACTIVE.
361          */
362         if (READ_ONCE(bts->state) != BTS_STATE_ACTIVE)
363                 return;
364 
365         if (bts->handle.event)
366                 __bts_event_stop(bts->handle.event, BTS_STATE_INACTIVE);
367 }
368 
369 static int
370 bts_buffer_reset(struct bts_buffer *buf, struct perf_output_handle *handle)
371 {
372         unsigned long head, space, next_space, pad, gap, skip, wakeup;
373         unsigned int next_buf;
374         struct bts_phys *phys, *next_phys;
375         int ret;
376 
377         if (buf->snapshot)
378                 return 0;
379 
380         head = handle->head & ((buf->nr_pages << PAGE_SHIFT) - 1);
381 
382         phys = &buf->buf[buf->cur_buf];
383         space = phys->offset + phys->displacement + phys->size - head;
384         pad = space;
385         if (space > handle->size) {
386                 space = handle->size;
387                 space -= space % BTS_RECORD_SIZE;
388         }
389         if (space <= BTS_SAFETY_MARGIN) {
390                 /* See if next phys buffer has more space */
391                 next_buf = buf->cur_buf + 1;
392                 if (next_buf >= buf->nr_bufs)
393                         next_buf = 0;
394                 next_phys = &buf->buf[next_buf];
395                 gap = buf_size(phys->page) - phys->displacement - phys->size +
396                       next_phys->displacement;
397                 skip = pad + gap;
398                 if (handle->size >= skip) {
399                         next_space = next_phys->size;
400                         if (next_space + skip > handle->size) {
401                                 next_space = handle->size - skip;
402                                 next_space -= next_space % BTS_RECORD_SIZE;
403                         }
404                         if (next_space > space || !space) {
405                                 if (pad)
406                                         bts_buffer_pad_out(phys, head);
407                                 ret = perf_aux_output_skip(handle, skip);
408                                 if (ret)
409                                         return ret;
410                                 /* Advance to next phys buffer */
411                                 phys = next_phys;
412                                 space = next_space;
413                                 head = phys->offset + phys->displacement;
414                                 /*
415                                  * After this, cur_buf and head won't match ds
416                                  * anymore, so we must not be racing with
417                                  * bts_update().
418                                  */
419                                 buf->cur_buf = next_buf;
420                                 local_set(&buf->head, head);
421                         }
422                 }
423         }
424 
425         /* Don't go far beyond wakeup watermark */
426         wakeup = BTS_SAFETY_MARGIN + BTS_RECORD_SIZE + handle->wakeup -
427                  handle->head;
428         if (space > wakeup) {
429                 space = wakeup;
430                 space -= space % BTS_RECORD_SIZE;
431         }
432 
433         buf->end = head + space;
434 
435         /*
436          * If we have no space, the lost notification would have been sent when
437          * we hit absolute_maximum - see bts_update()
438          */
439         if (!space)
440                 return -ENOSPC;
441 
442         return 0;
443 }
444 
445 int intel_bts_interrupt(void)
446 {
447         struct debug_store *ds = this_cpu_ptr(&cpu_hw_events)->ds;
448         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
449         struct perf_event *event = bts->handle.event;
450         struct bts_buffer *buf;
451         s64 old_head;
452         int err = -ENOSPC, handled = 0;
453 
454         /*
455          * The only surefire way of knowing if this NMI is ours is by checking
456          * the write ptr against the PMI threshold.
457          */
458         if (ds && (ds->bts_index >= ds->bts_interrupt_threshold))
459                 handled = 1;
460 
461         /*
462          * this is wrapped in intel_bts_enable_local/intel_bts_disable_local,
463          * so we can only be INACTIVE or STOPPED
464          */
465         if (READ_ONCE(bts->state) == BTS_STATE_STOPPED)
466                 return handled;
467 
468         buf = perf_get_aux(&bts->handle);
469         if (!buf)
470                 return handled;
471 
472         /*
473          * Skip snapshot counters: they don't use the interrupt, but
474          * there's no other way of telling, because the pointer will
475          * keep moving
476          */
477         if (buf->snapshot)
478                 return 0;
479 
480         old_head = local_read(&buf->head);
481         bts_update(bts);
482 
483         /* no new data */
484         if (old_head == local_read(&buf->head))
485                 return handled;
486 
487         perf_aux_output_end(&bts->handle, local_xchg(&buf->data_size, 0),
488                             !!local_xchg(&buf->lost, 0));
489 
490         buf = perf_aux_output_begin(&bts->handle, event);
491         if (buf)
492                 err = bts_buffer_reset(buf, &bts->handle);
493 
494         if (err) {
495                 WRITE_ONCE(bts->state, BTS_STATE_STOPPED);
496 
497                 if (buf) {
498                         /*
499                          * BTS_STATE_STOPPED should be visible before
500                          * cleared handle::event
501                          */
502                         barrier();
503                         perf_aux_output_end(&bts->handle, 0, false);
504                 }
505         }
506 
507         return 1;
508 }
509 
510 static void bts_event_del(struct perf_event *event, int mode)
511 {
512         bts_event_stop(event, PERF_EF_UPDATE);
513 }
514 
515 static int bts_event_add(struct perf_event *event, int mode)
516 {
517         struct bts_ctx *bts = this_cpu_ptr(&bts_ctx);
518         struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
519         struct hw_perf_event *hwc = &event->hw;
520 
521         event->hw.state = PERF_HES_STOPPED;
522 
523         if (test_bit(INTEL_PMC_IDX_FIXED_BTS, cpuc->active_mask))
524                 return -EBUSY;
525 
526         if (bts->handle.event)
527                 return -EBUSY;
528 
529         if (mode & PERF_EF_START) {
530                 bts_event_start(event, 0);
531                 if (hwc->state & PERF_HES_STOPPED)
532                         return -EINVAL;
533         }
534 
535         return 0;
536 }
537 
538 static void bts_event_destroy(struct perf_event *event)
539 {
540         x86_release_hardware();
541         x86_del_exclusive(x86_lbr_exclusive_bts);
542 }
543 
544 static int bts_event_init(struct perf_event *event)
545 {
546         int ret;
547 
548         if (event->attr.type != bts_pmu.type)
549                 return -ENOENT;
550 
551         if (x86_add_exclusive(x86_lbr_exclusive_bts))
552                 return -EBUSY;
553 
554         /*
555          * BTS leaks kernel addresses even when CPL0 tracing is
556          * disabled, so disallow intel_bts driver for unprivileged
557          * users on paranoid systems since it provides trace data
558          * to the user in a zero-copy fashion.
559          *
560          * Note that the default paranoia setting permits unprivileged
561          * users to profile the kernel.
562          */
563         if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
564             !capable(CAP_SYS_ADMIN))
565                 return -EACCES;
566 
567         ret = x86_reserve_hardware();
568         if (ret) {
569                 x86_del_exclusive(x86_lbr_exclusive_bts);
570                 return ret;
571         }
572 
573         event->destroy = bts_event_destroy;
574 
575         return 0;
576 }
577 
578 static void bts_event_read(struct perf_event *event)
579 {
580 }
581 
582 static __init int bts_init(void)
583 {
584         if (!boot_cpu_has(X86_FEATURE_DTES64) || !x86_pmu.bts)
585                 return -ENODEV;
586 
587         bts_pmu.capabilities    = PERF_PMU_CAP_AUX_NO_SG | PERF_PMU_CAP_ITRACE |
588                                   PERF_PMU_CAP_EXCLUSIVE;
589         bts_pmu.task_ctx_nr     = perf_sw_context;
590         bts_pmu.event_init      = bts_event_init;
591         bts_pmu.add             = bts_event_add;
592         bts_pmu.del             = bts_event_del;
593         bts_pmu.start           = bts_event_start;
594         bts_pmu.stop            = bts_event_stop;
595         bts_pmu.read            = bts_event_read;
596         bts_pmu.setup_aux       = bts_buffer_setup_aux;
597         bts_pmu.free_aux        = bts_buffer_free_aux;
598 
599         return perf_pmu_register(&bts_pmu, "intel_bts", -1);
600 }
601 arch_initcall(bts_init);
602 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp