1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * This file implements the perfmon-2 subsystem which is used 4 * to program the IA-64 Performance Monitoring Unit (PMU). 5 * 6 * The initial version of perfmon.c was written by 7 * Ganesh Venkitachalam, IBM Corp. 8 * 9 * Then it was modified for perfmon-1.x by Stephane Eranian and 10 * David Mosberger, Hewlett Packard Co. 11 * 12 * Version Perfmon-2.x is a rewrite of perfmon-1.x 13 * by Stephane Eranian, Hewlett Packard Co. 14 * 15 * Copyright (C) 1999-2005 Hewlett Packard Co 16 * Stephane Eranian <eranian@hpl.hp.com> 17 * David Mosberger-Tang <davidm@hpl.hp.com> 18 * 19 * More information about perfmon available at: 20 * http://www.hpl.hp.com/research/linux/perfmon 21 */ 22 23 #include <linux/module.h> 24 #include <linux/kernel.h> 25 #include <linux/sched.h> 26 #include <linux/sched/task.h> 27 #include <linux/sched/task_stack.h> 28 #include <linux/interrupt.h> 29 #include <linux/proc_fs.h> 30 #include <linux/seq_file.h> 31 #include <linux/init.h> 32 #include <linux/vmalloc.h> 33 #include <linux/mm.h> 34 #include <linux/sysctl.h> 35 #include <linux/list.h> 36 #include <linux/file.h> 37 #include <linux/poll.h> 38 #include <linux/vfs.h> 39 #include <linux/smp.h> 40 #include <linux/pagemap.h> 41 #include <linux/mount.h> 42 #include <linux/pseudo_fs.h> 43 #include <linux/bitops.h> 44 #include <linux/capability.h> 45 #include <linux/rcupdate.h> 46 #include <linux/completion.h> 47 #include <linux/tracehook.h> 48 #include <linux/slab.h> 49 #include <linux/cpu.h> 50 51 #include <asm/errno.h> 52 #include <asm/intrinsics.h> 53 #include <asm/page.h> 54 #include <asm/perfmon.h> 55 #include <asm/processor.h> 56 #include <asm/signal.h> 57 #include <linux/uaccess.h> 58 #include <asm/delay.h> 59 60 #ifdef CONFIG_PERFMON 61 /* 62 * perfmon context state 63 */ 64 #define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ 65 #define PFM_CTX_LOADED 2 /* context is loaded onto a task */ 66 #define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ 67 #define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ 68 69 #define PFM_INVALID_ACTIVATION (~0UL) 70 71 #define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ 72 #define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ 73 74 /* 75 * depth of message queue 76 */ 77 #define PFM_MAX_MSGS 32 78 #define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) 79 80 /* 81 * type of a PMU register (bitmask). 82 * bitmask structure: 83 * bit0 : register implemented 84 * bit1 : end marker 85 * bit2-3 : reserved 86 * bit4 : pmc has pmc.pm 87 * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter 88 * bit6-7 : register type 89 * bit8-31: reserved 90 */ 91 #define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ 92 #define PFM_REG_IMPL 0x1 /* register implemented */ 93 #define PFM_REG_END 0x2 /* end marker */ 94 #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ 95 #define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ 96 #define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ 97 #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ 98 #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ 99 100 #define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) 101 #define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) 102 103 #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) 104 105 /* i assumed unsigned */ 106 #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) 107 #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) 108 109 /* XXX: these assume that register i is implemented */ 110 #define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 111 #define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 112 #define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) 113 #define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) 114 115 #define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value 116 #define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask 117 #define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] 118 #define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] 119 120 #define PFM_NUM_IBRS IA64_NUM_DBG_REGS 121 #define PFM_NUM_DBRS IA64_NUM_DBG_REGS 122 123 #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) 124 #define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) 125 #define PFM_CTX_TASK(h) (h)->ctx_task 126 127 #define PMU_PMC_OI 5 /* position of pmc.oi bit */ 128 129 /* XXX: does not support more than 64 PMDs */ 130 #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) 131 #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) 132 133 #define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) 134 135 #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) 136 #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) 137 #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) 138 #define PFM_CODE_RR 0 /* requesting code range restriction */ 139 #define PFM_DATA_RR 1 /* requestion data range restriction */ 140 141 #define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) 142 #define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) 143 #define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) 144 145 #define RDEP(x) (1UL<<(x)) 146 147 /* 148 * context protection macros 149 * in SMP: 150 * - we need to protect against CPU concurrency (spin_lock) 151 * - we need to protect against PMU overflow interrupts (local_irq_disable) 152 * in UP: 153 * - we need to protect against PMU overflow interrupts (local_irq_disable) 154 * 155 * spin_lock_irqsave()/spin_unlock_irqrestore(): 156 * in SMP: local_irq_disable + spin_lock 157 * in UP : local_irq_disable 158 * 159 * spin_lock()/spin_lock(): 160 * in UP : removed automatically 161 * in SMP: protect against context accesses from other CPU. interrupts 162 * are not masked. This is useful for the PMU interrupt handler 163 * because we know we will not get PMU concurrency in that code. 164 */ 165 #define PROTECT_CTX(c, f) \ 166 do { \ 167 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ 168 spin_lock_irqsave(&(c)->ctx_lock, f); \ 169 DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ 170 } while(0) 171 172 #define UNPROTECT_CTX(c, f) \ 173 do { \ 174 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ 175 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 176 } while(0) 177 178 #define PROTECT_CTX_NOPRINT(c, f) \ 179 do { \ 180 spin_lock_irqsave(&(c)->ctx_lock, f); \ 181 } while(0) 182 183 184 #define UNPROTECT_CTX_NOPRINT(c, f) \ 185 do { \ 186 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 187 } while(0) 188 189 190 #define PROTECT_CTX_NOIRQ(c) \ 191 do { \ 192 spin_lock(&(c)->ctx_lock); \ 193 } while(0) 194 195 #define UNPROTECT_CTX_NOIRQ(c) \ 196 do { \ 197 spin_unlock(&(c)->ctx_lock); \ 198 } while(0) 199 200 201 #ifdef CONFIG_SMP 202 203 #define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) 204 #define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ 205 #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() 206 207 #else /* !CONFIG_SMP */ 208 #define SET_ACTIVATION(t) do {} while(0) 209 #define GET_ACTIVATION(t) do {} while(0) 210 #define INC_ACTIVATION(t) do {} while(0) 211 #endif /* CONFIG_SMP */ 212 213 #define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) 214 #define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) 215 #define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) 216 217 #define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) 218 #define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) 219 220 #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) 221 222 /* 223 * cmp0 must be the value of pmc0 224 */ 225 #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) 226 227 #define PFMFS_MAGIC 0xa0b4d889 228 229 /* 230 * debugging 231 */ 232 #define PFM_DEBUGGING 1 233 #ifdef PFM_DEBUGGING 234 #define DPRINT(a) \ 235 do { \ 236 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 237 } while (0) 238 239 #define DPRINT_ovfl(a) \ 240 do { \ 241 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 242 } while (0) 243 #endif 244 245 /* 246 * 64-bit software counter structure 247 * 248 * the next_reset_type is applied to the next call to pfm_reset_regs() 249 */ 250 typedef struct { 251 unsigned long val; /* virtual 64bit counter value */ 252 unsigned long lval; /* last reset value */ 253 unsigned long long_reset; /* reset value on sampling overflow */ 254 unsigned long short_reset; /* reset value on overflow */ 255 unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ 256 unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ 257 unsigned long seed; /* seed for random-number generator */ 258 unsigned long mask; /* mask for random-number generator */ 259 unsigned int flags; /* notify/do not notify */ 260 unsigned long eventid; /* overflow event identifier */ 261 } pfm_counter_t; 262 263 /* 264 * context flags 265 */ 266 typedef struct { 267 unsigned int block:1; /* when 1, task will blocked on user notifications */ 268 unsigned int system:1; /* do system wide monitoring */ 269 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ 270 unsigned int is_sampling:1; /* true if using a custom format */ 271 unsigned int excl_idle:1; /* exclude idle task in system wide session */ 272 unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ 273 unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ 274 unsigned int no_msg:1; /* no message sent on overflow */ 275 unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ 276 unsigned int reserved:22; 277 } pfm_context_flags_t; 278 279 #define PFM_TRAP_REASON_NONE 0x0 /* default value */ 280 #define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ 281 #define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ 282 283 284 /* 285 * perfmon context: encapsulates all the state of a monitoring session 286 */ 287 288 typedef struct pfm_context { 289 spinlock_t ctx_lock; /* context protection */ 290 291 pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ 292 unsigned int ctx_state; /* state: active/inactive (no bitfield) */ 293 294 struct task_struct *ctx_task; /* task to which context is attached */ 295 296 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ 297 298 struct completion ctx_restart_done; /* use for blocking notification mode */ 299 300 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ 301 unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ 302 unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ 303 304 unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ 305 unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ 306 unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ 307 308 unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ 309 310 unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ 311 unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ 312 unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ 313 unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ 314 315 pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ 316 317 unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ 318 unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ 319 320 unsigned long ctx_saved_psr_up; /* only contains psr.up value */ 321 322 unsigned long ctx_last_activation; /* context last activation number for last_cpu */ 323 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ 324 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ 325 326 int ctx_fd; /* file descriptor used my this context */ 327 pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ 328 329 pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ 330 void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ 331 unsigned long ctx_smpl_size; /* size of sampling buffer */ 332 void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ 333 334 wait_queue_head_t ctx_msgq_wait; 335 pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; 336 int ctx_msgq_head; 337 int ctx_msgq_tail; 338 struct fasync_struct *ctx_async_queue; 339 340 wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ 341 } pfm_context_t; 342 343 /* 344 * magic number used to verify that structure is really 345 * a perfmon context 346 */ 347 #define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) 348 349 #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) 350 351 #ifdef CONFIG_SMP 352 #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) 353 #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu 354 #else 355 #define SET_LAST_CPU(ctx, v) do {} while(0) 356 #define GET_LAST_CPU(ctx) do {} while(0) 357 #endif 358 359 360 #define ctx_fl_block ctx_flags.block 361 #define ctx_fl_system ctx_flags.system 362 #define ctx_fl_using_dbreg ctx_flags.using_dbreg 363 #define ctx_fl_is_sampling ctx_flags.is_sampling 364 #define ctx_fl_excl_idle ctx_flags.excl_idle 365 #define ctx_fl_going_zombie ctx_flags.going_zombie 366 #define ctx_fl_trap_reason ctx_flags.trap_reason 367 #define ctx_fl_no_msg ctx_flags.no_msg 368 #define ctx_fl_can_restart ctx_flags.can_restart 369 370 #define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); 371 #define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking 372 373 /* 374 * global information about all sessions 375 * mostly used to synchronize between system wide and per-process 376 */ 377 typedef struct { 378 spinlock_t pfs_lock; /* lock the structure */ 379 380 unsigned int pfs_task_sessions; /* number of per task sessions */ 381 unsigned int pfs_sys_sessions; /* number of per system wide sessions */ 382 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ 383 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ 384 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ 385 } pfm_session_t; 386 387 /* 388 * information about a PMC or PMD. 389 * dep_pmd[]: a bitmask of dependent PMD registers 390 * dep_pmc[]: a bitmask of dependent PMC registers 391 */ 392 typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); 393 typedef struct { 394 unsigned int type; 395 int pm_pos; 396 unsigned long default_value; /* power-on default value */ 397 unsigned long reserved_mask; /* bitmask of reserved bits */ 398 pfm_reg_check_t read_check; 399 pfm_reg_check_t write_check; 400 unsigned long dep_pmd[4]; 401 unsigned long dep_pmc[4]; 402 } pfm_reg_desc_t; 403 404 /* assume cnum is a valid monitor */ 405 #define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) 406 407 /* 408 * This structure is initialized at boot time and contains 409 * a description of the PMU main characteristics. 410 * 411 * If the probe function is defined, detection is based 412 * on its return value: 413 * - 0 means recognized PMU 414 * - anything else means not supported 415 * When the probe function is not defined, then the pmu_family field 416 * is used and it must match the host CPU family such that: 417 * - cpu->family & config->pmu_family != 0 418 */ 419 typedef struct { 420 unsigned long ovfl_val; /* overflow value for counters */ 421 422 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ 423 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ 424 425 unsigned int num_pmcs; /* number of PMCS: computed at init time */ 426 unsigned int num_pmds; /* number of PMDS: computed at init time */ 427 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ 428 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ 429 430 char *pmu_name; /* PMU family name */ 431 unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ 432 unsigned int flags; /* pmu specific flags */ 433 unsigned int num_ibrs; /* number of IBRS: computed at init time */ 434 unsigned int num_dbrs; /* number of DBRS: computed at init time */ 435 unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ 436 int (*probe)(void); /* customized probe routine */ 437 unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ 438 } pmu_config_t; 439 /* 440 * PMU specific flags 441 */ 442 #define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ 443 444 /* 445 * debug register related type definitions 446 */ 447 typedef struct { 448 unsigned long ibr_mask:56; 449 unsigned long ibr_plm:4; 450 unsigned long ibr_ig:3; 451 unsigned long ibr_x:1; 452 } ibr_mask_reg_t; 453 454 typedef struct { 455 unsigned long dbr_mask:56; 456 unsigned long dbr_plm:4; 457 unsigned long dbr_ig:2; 458 unsigned long dbr_w:1; 459 unsigned long dbr_r:1; 460 } dbr_mask_reg_t; 461 462 typedef union { 463 unsigned long val; 464 ibr_mask_reg_t ibr; 465 dbr_mask_reg_t dbr; 466 } dbreg_t; 467 468 469 /* 470 * perfmon command descriptions 471 */ 472 typedef struct { 473 int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 474 char *cmd_name; 475 int cmd_flags; 476 unsigned int cmd_narg; 477 size_t cmd_argsize; 478 int (*cmd_getsize)(void *arg, size_t *sz); 479 } pfm_cmd_desc_t; 480 481 #define PFM_CMD_FD 0x01 /* command requires a file descriptor */ 482 #define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ 483 #define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ 484 #define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ 485 486 487 #define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name 488 #define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) 489 #define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) 490 #define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) 491 #define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) 492 493 #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ 494 495 typedef struct { 496 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ 497 unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ 498 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ 499 unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ 500 unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ 501 unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ 502 unsigned long pfm_smpl_handler_calls; 503 unsigned long pfm_smpl_handler_cycles; 504 char pad[SMP_CACHE_BYTES] ____cacheline_aligned; 505 } pfm_stats_t; 506 507 /* 508 * perfmon internal variables 509 */ 510 static pfm_stats_t pfm_stats[NR_CPUS]; 511 static pfm_session_t pfm_sessions; /* global sessions information */ 512 513 static DEFINE_SPINLOCK(pfm_alt_install_check); 514 static pfm_intr_handler_desc_t *pfm_alt_intr_handler; 515 516 static struct proc_dir_entry *perfmon_dir; 517 static pfm_uuid_t pfm_null_uuid = {0,}; 518 519 static spinlock_t pfm_buffer_fmt_lock; 520 static LIST_HEAD(pfm_buffer_fmt_list); 521 522 static pmu_config_t *pmu_conf; 523 524 /* sysctl() controls */ 525 pfm_sysctl_t pfm_sysctl; 526 EXPORT_SYMBOL(pfm_sysctl); 527 528 static struct ctl_table pfm_ctl_table[] = { 529 { 530 .procname = "debug", 531 .data = &pfm_sysctl.debug, 532 .maxlen = sizeof(int), 533 .mode = 0666, 534 .proc_handler = proc_dointvec, 535 }, 536 { 537 .procname = "debug_ovfl", 538 .data = &pfm_sysctl.debug_ovfl, 539 .maxlen = sizeof(int), 540 .mode = 0666, 541 .proc_handler = proc_dointvec, 542 }, 543 { 544 .procname = "fastctxsw", 545 .data = &pfm_sysctl.fastctxsw, 546 .maxlen = sizeof(int), 547 .mode = 0600, 548 .proc_handler = proc_dointvec, 549 }, 550 { 551 .procname = "expert_mode", 552 .data = &pfm_sysctl.expert_mode, 553 .maxlen = sizeof(int), 554 .mode = 0600, 555 .proc_handler = proc_dointvec, 556 }, 557 {} 558 }; 559 static struct ctl_table pfm_sysctl_dir[] = { 560 { 561 .procname = "perfmon", 562 .mode = 0555, 563 .child = pfm_ctl_table, 564 }, 565 {} 566 }; 567 static struct ctl_table pfm_sysctl_root[] = { 568 { 569 .procname = "kernel", 570 .mode = 0555, 571 .child = pfm_sysctl_dir, 572 }, 573 {} 574 }; 575 static struct ctl_table_header *pfm_sysctl_header; 576 577 static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 578 579 #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) 580 #define pfm_get_cpu_data(a,b) per_cpu(a, b) 581 582 static inline void 583 pfm_put_task(struct task_struct *task) 584 { 585 if (task != current) put_task_struct(task); 586 } 587 588 static inline unsigned long 589 pfm_protect_ctx_ctxsw(pfm_context_t *x) 590 { 591 spin_lock(&(x)->ctx_lock); 592 return 0UL; 593 } 594 595 static inline void 596 pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) 597 { 598 spin_unlock(&(x)->ctx_lock); 599 } 600 601 /* forward declaration */ 602 static const struct dentry_operations pfmfs_dentry_operations; 603 604 static int pfmfs_init_fs_context(struct fs_context *fc) 605 { 606 struct pseudo_fs_context *ctx = init_pseudo(fc, PFMFS_MAGIC); 607 if (!ctx) 608 return -ENOMEM; 609 ctx->dops = &pfmfs_dentry_operations; 610 return 0; 611 } 612 613 static struct file_system_type pfm_fs_type = { 614 .name = "pfmfs", 615 .init_fs_context = pfmfs_init_fs_context, 616 .kill_sb = kill_anon_super, 617 }; 618 MODULE_ALIAS_FS("pfmfs"); 619 620 DEFINE_PER_CPU(unsigned long, pfm_syst_info); 621 DEFINE_PER_CPU(struct task_struct *, pmu_owner); 622 DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 623 DEFINE_PER_CPU(unsigned long, pmu_activation_number); 624 EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); 625 626 627 /* forward declaration */ 628 static const struct file_operations pfm_file_ops; 629 630 /* 631 * forward declarations 632 */ 633 #ifndef CONFIG_SMP 634 static void pfm_lazy_save_regs (struct task_struct *ta); 635 #endif 636 637 void dump_pmu_state(const char *); 638 static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 639 640 #include "perfmon_itanium.h" 641 #include "perfmon_mckinley.h" 642 #include "perfmon_montecito.h" 643 #include "perfmon_generic.h" 644 645 static pmu_config_t *pmu_confs[]={ 646 &pmu_conf_mont, 647 &pmu_conf_mck, 648 &pmu_conf_ita, 649 &pmu_conf_gen, /* must be last */ 650 NULL 651 }; 652 653 654 static int pfm_end_notify_user(pfm_context_t *ctx); 655 656 static inline void 657 pfm_clear_psr_pp(void) 658 { 659 ia64_rsm(IA64_PSR_PP); 660 ia64_srlz_i(); 661 } 662 663 static inline void 664 pfm_set_psr_pp(void) 665 { 666 ia64_ssm(IA64_PSR_PP); 667 ia64_srlz_i(); 668 } 669 670 static inline void 671 pfm_clear_psr_up(void) 672 { 673 ia64_rsm(IA64_PSR_UP); 674 ia64_srlz_i(); 675 } 676 677 static inline void 678 pfm_set_psr_up(void) 679 { 680 ia64_ssm(IA64_PSR_UP); 681 ia64_srlz_i(); 682 } 683 684 static inline unsigned long 685 pfm_get_psr(void) 686 { 687 unsigned long tmp; 688 tmp = ia64_getreg(_IA64_REG_PSR); 689 ia64_srlz_i(); 690 return tmp; 691 } 692 693 static inline void 694 pfm_set_psr_l(unsigned long val) 695 { 696 ia64_setreg(_IA64_REG_PSR_L, val); 697 ia64_srlz_i(); 698 } 699 700 static inline void 701 pfm_freeze_pmu(void) 702 { 703 ia64_set_pmc(0,1UL); 704 ia64_srlz_d(); 705 } 706 707 static inline void 708 pfm_unfreeze_pmu(void) 709 { 710 ia64_set_pmc(0,0UL); 711 ia64_srlz_d(); 712 } 713 714 static inline void 715 pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) 716 { 717 int i; 718 719 for (i=0; i < nibrs; i++) { 720 ia64_set_ibr(i, ibrs[i]); 721 ia64_dv_serialize_instruction(); 722 } 723 ia64_srlz_i(); 724 } 725 726 static inline void 727 pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) 728 { 729 int i; 730 731 for (i=0; i < ndbrs; i++) { 732 ia64_set_dbr(i, dbrs[i]); 733 ia64_dv_serialize_data(); 734 } 735 ia64_srlz_d(); 736 } 737 738 /* 739 * PMD[i] must be a counter. no check is made 740 */ 741 static inline unsigned long 742 pfm_read_soft_counter(pfm_context_t *ctx, int i) 743 { 744 return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); 745 } 746 747 /* 748 * PMD[i] must be a counter. no check is made 749 */ 750 static inline void 751 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) 752 { 753 unsigned long ovfl_val = pmu_conf->ovfl_val; 754 755 ctx->ctx_pmds[i].val = val & ~ovfl_val; 756 /* 757 * writing to unimplemented part is ignore, so we do not need to 758 * mask off top part 759 */ 760 ia64_set_pmd(i, val & ovfl_val); 761 } 762 763 static pfm_msg_t * 764 pfm_get_new_msg(pfm_context_t *ctx) 765 { 766 int idx, next; 767 768 next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; 769 770 DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 771 if (next == ctx->ctx_msgq_head) return NULL; 772 773 idx = ctx->ctx_msgq_tail; 774 ctx->ctx_msgq_tail = next; 775 776 DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); 777 778 return ctx->ctx_msgq+idx; 779 } 780 781 static pfm_msg_t * 782 pfm_get_next_msg(pfm_context_t *ctx) 783 { 784 pfm_msg_t *msg; 785 786 DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 787 788 if (PFM_CTXQ_EMPTY(ctx)) return NULL; 789 790 /* 791 * get oldest message 792 */ 793 msg = ctx->ctx_msgq+ctx->ctx_msgq_head; 794 795 /* 796 * and move forward 797 */ 798 ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; 799 800 DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); 801 802 return msg; 803 } 804 805 static void 806 pfm_reset_msgq(pfm_context_t *ctx) 807 { 808 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 809 DPRINT(("ctx=%p msgq reset\n", ctx)); 810 } 811 812 static pfm_context_t * 813 pfm_context_alloc(int ctx_flags) 814 { 815 pfm_context_t *ctx; 816 817 /* 818 * allocate context descriptor 819 * must be able to free with interrupts disabled 820 */ 821 ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); 822 if (ctx) { 823 DPRINT(("alloc ctx @%p\n", ctx)); 824 825 /* 826 * init context protection lock 827 */ 828 spin_lock_init(&ctx->ctx_lock); 829 830 /* 831 * context is unloaded 832 */ 833 ctx->ctx_state = PFM_CTX_UNLOADED; 834 835 /* 836 * initialization of context's flags 837 */ 838 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; 839 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; 840 ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; 841 /* 842 * will move to set properties 843 * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; 844 */ 845 846 /* 847 * init restart semaphore to locked 848 */ 849 init_completion(&ctx->ctx_restart_done); 850 851 /* 852 * activation is used in SMP only 853 */ 854 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 855 SET_LAST_CPU(ctx, -1); 856 857 /* 858 * initialize notification message queue 859 */ 860 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 861 init_waitqueue_head(&ctx->ctx_msgq_wait); 862 init_waitqueue_head(&ctx->ctx_zombieq); 863 864 } 865 return ctx; 866 } 867 868 static void 869 pfm_context_free(pfm_context_t *ctx) 870 { 871 if (ctx) { 872 DPRINT(("free ctx @%p\n", ctx)); 873 kfree(ctx); 874 } 875 } 876 877 static void 878 pfm_mask_monitoring(struct task_struct *task) 879 { 880 pfm_context_t *ctx = PFM_GET_CTX(task); 881 unsigned long mask, val, ovfl_mask; 882 int i; 883 884 DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); 885 886 ovfl_mask = pmu_conf->ovfl_val; 887 /* 888 * monitoring can only be masked as a result of a valid 889 * counter overflow. In UP, it means that the PMU still 890 * has an owner. Note that the owner can be different 891 * from the current task. However the PMU state belongs 892 * to the owner. 893 * In SMP, a valid overflow only happens when task is 894 * current. Therefore if we come here, we know that 895 * the PMU state belongs to the current task, therefore 896 * we can access the live registers. 897 * 898 * So in both cases, the live register contains the owner's 899 * state. We can ONLY touch the PMU registers and NOT the PSR. 900 * 901 * As a consequence to this call, the ctx->th_pmds[] array 902 * contains stale information which must be ignored 903 * when context is reloaded AND monitoring is active (see 904 * pfm_restart). 905 */ 906 mask = ctx->ctx_used_pmds[0]; 907 for (i = 0; mask; i++, mask>>=1) { 908 /* skip non used pmds */ 909 if ((mask & 0x1) == 0) continue; 910 val = ia64_get_pmd(i); 911 912 if (PMD_IS_COUNTING(i)) { 913 /* 914 * we rebuild the full 64 bit value of the counter 915 */ 916 ctx->ctx_pmds[i].val += (val & ovfl_mask); 917 } else { 918 ctx->ctx_pmds[i].val = val; 919 } 920 DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 921 i, 922 ctx->ctx_pmds[i].val, 923 val & ovfl_mask)); 924 } 925 /* 926 * mask monitoring by setting the privilege level to 0 927 * we cannot use psr.pp/psr.up for this, it is controlled by 928 * the user 929 * 930 * if task is current, modify actual registers, otherwise modify 931 * thread save state, i.e., what will be restored in pfm_load_regs() 932 */ 933 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 934 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 935 if ((mask & 0x1) == 0UL) continue; 936 ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); 937 ctx->th_pmcs[i] &= ~0xfUL; 938 DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 939 } 940 /* 941 * make all of this visible 942 */ 943 ia64_srlz_d(); 944 } 945 946 /* 947 * must always be done with task == current 948 * 949 * context must be in MASKED state when calling 950 */ 951 static void 952 pfm_restore_monitoring(struct task_struct *task) 953 { 954 pfm_context_t *ctx = PFM_GET_CTX(task); 955 unsigned long mask, ovfl_mask; 956 unsigned long psr, val; 957 int i, is_system; 958 959 is_system = ctx->ctx_fl_system; 960 ovfl_mask = pmu_conf->ovfl_val; 961 962 if (task != current) { 963 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); 964 return; 965 } 966 if (ctx->ctx_state != PFM_CTX_MASKED) { 967 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 968 task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); 969 return; 970 } 971 psr = pfm_get_psr(); 972 /* 973 * monitoring is masked via the PMC. 974 * As we restore their value, we do not want each counter to 975 * restart right away. We stop monitoring using the PSR, 976 * restore the PMC (and PMD) and then re-establish the psr 977 * as it was. Note that there can be no pending overflow at 978 * this point, because monitoring was MASKED. 979 * 980 * system-wide session are pinned and self-monitoring 981 */ 982 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 983 /* disable dcr pp */ 984 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 985 pfm_clear_psr_pp(); 986 } else { 987 pfm_clear_psr_up(); 988 } 989 /* 990 * first, we restore the PMD 991 */ 992 mask = ctx->ctx_used_pmds[0]; 993 for (i = 0; mask; i++, mask>>=1) { 994 /* skip non used pmds */ 995 if ((mask & 0x1) == 0) continue; 996 997 if (PMD_IS_COUNTING(i)) { 998 /* 999 * we split the 64bit value according to 1000 * counter width 1001 */ 1002 val = ctx->ctx_pmds[i].val & ovfl_mask; 1003 ctx->ctx_pmds[i].val &= ~ovfl_mask; 1004 } else { 1005 val = ctx->ctx_pmds[i].val; 1006 } 1007 ia64_set_pmd(i, val); 1008 1009 DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 1010 i, 1011 ctx->ctx_pmds[i].val, 1012 val)); 1013 } 1014 /* 1015 * restore the PMCs 1016 */ 1017 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 1018 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 1019 if ((mask & 0x1) == 0UL) continue; 1020 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1021 ia64_set_pmc(i, ctx->th_pmcs[i]); 1022 DPRINT(("[%d] pmc[%d]=0x%lx\n", 1023 task_pid_nr(task), i, ctx->th_pmcs[i])); 1024 } 1025 ia64_srlz_d(); 1026 1027 /* 1028 * must restore DBR/IBR because could be modified while masked 1029 * XXX: need to optimize 1030 */ 1031 if (ctx->ctx_fl_using_dbreg) { 1032 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 1033 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 1034 } 1035 1036 /* 1037 * now restore PSR 1038 */ 1039 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1040 /* enable dcr pp */ 1041 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 1042 ia64_srlz_i(); 1043 } 1044 pfm_set_psr_l(psr); 1045 } 1046 1047 static inline void 1048 pfm_save_pmds(unsigned long *pmds, unsigned long mask) 1049 { 1050 int i; 1051 1052 ia64_srlz_d(); 1053 1054 for (i=0; mask; i++, mask>>=1) { 1055 if (mask & 0x1) pmds[i] = ia64_get_pmd(i); 1056 } 1057 } 1058 1059 /* 1060 * reload from thread state (used for ctxw only) 1061 */ 1062 static inline void 1063 pfm_restore_pmds(unsigned long *pmds, unsigned long mask) 1064 { 1065 int i; 1066 unsigned long val, ovfl_val = pmu_conf->ovfl_val; 1067 1068 for (i=0; mask; i++, mask>>=1) { 1069 if ((mask & 0x1) == 0) continue; 1070 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; 1071 ia64_set_pmd(i, val); 1072 } 1073 ia64_srlz_d(); 1074 } 1075 1076 /* 1077 * propagate PMD from context to thread-state 1078 */ 1079 static inline void 1080 pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) 1081 { 1082 unsigned long ovfl_val = pmu_conf->ovfl_val; 1083 unsigned long mask = ctx->ctx_all_pmds[0]; 1084 unsigned long val; 1085 int i; 1086 1087 DPRINT(("mask=0x%lx\n", mask)); 1088 1089 for (i=0; mask; i++, mask>>=1) { 1090 1091 val = ctx->ctx_pmds[i].val; 1092 1093 /* 1094 * We break up the 64 bit value into 2 pieces 1095 * the lower bits go to the machine state in the 1096 * thread (will be reloaded on ctxsw in). 1097 * The upper part stays in the soft-counter. 1098 */ 1099 if (PMD_IS_COUNTING(i)) { 1100 ctx->ctx_pmds[i].val = val & ~ovfl_val; 1101 val &= ovfl_val; 1102 } 1103 ctx->th_pmds[i] = val; 1104 1105 DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", 1106 i, 1107 ctx->th_pmds[i], 1108 ctx->ctx_pmds[i].val)); 1109 } 1110 } 1111 1112 /* 1113 * propagate PMC from context to thread-state 1114 */ 1115 static inline void 1116 pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) 1117 { 1118 unsigned long mask = ctx->ctx_all_pmcs[0]; 1119 int i; 1120 1121 DPRINT(("mask=0x%lx\n", mask)); 1122 1123 for (i=0; mask; i++, mask>>=1) { 1124 /* masking 0 with ovfl_val yields 0 */ 1125 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1126 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 1127 } 1128 } 1129 1130 1131 1132 static inline void 1133 pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) 1134 { 1135 int i; 1136 1137 for (i=0; mask; i++, mask>>=1) { 1138 if ((mask & 0x1) == 0) continue; 1139 ia64_set_pmc(i, pmcs[i]); 1140 } 1141 ia64_srlz_d(); 1142 } 1143 1144 static inline int 1145 pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) 1146 { 1147 return memcmp(a, b, sizeof(pfm_uuid_t)); 1148 } 1149 1150 static inline int 1151 pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) 1152 { 1153 int ret = 0; 1154 if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); 1155 return ret; 1156 } 1157 1158 static inline int 1159 pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) 1160 { 1161 int ret = 0; 1162 if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); 1163 return ret; 1164 } 1165 1166 1167 static inline int 1168 pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, 1169 int cpu, void *arg) 1170 { 1171 int ret = 0; 1172 if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); 1173 return ret; 1174 } 1175 1176 static inline int 1177 pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, 1178 int cpu, void *arg) 1179 { 1180 int ret = 0; 1181 if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); 1182 return ret; 1183 } 1184 1185 static inline int 1186 pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1187 { 1188 int ret = 0; 1189 if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); 1190 return ret; 1191 } 1192 1193 static inline int 1194 pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1195 { 1196 int ret = 0; 1197 if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); 1198 return ret; 1199 } 1200 1201 static pfm_buffer_fmt_t * 1202 __pfm_find_buffer_fmt(pfm_uuid_t uuid) 1203 { 1204 struct list_head * pos; 1205 pfm_buffer_fmt_t * entry; 1206 1207 list_for_each(pos, &pfm_buffer_fmt_list) { 1208 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 1209 if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) 1210 return entry; 1211 } 1212 return NULL; 1213 } 1214 1215 /* 1216 * find a buffer format based on its uuid 1217 */ 1218 static pfm_buffer_fmt_t * 1219 pfm_find_buffer_fmt(pfm_uuid_t uuid) 1220 { 1221 pfm_buffer_fmt_t * fmt; 1222 spin_lock(&pfm_buffer_fmt_lock); 1223 fmt = __pfm_find_buffer_fmt(uuid); 1224 spin_unlock(&pfm_buffer_fmt_lock); 1225 return fmt; 1226 } 1227 1228 int 1229 pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) 1230 { 1231 int ret = 0; 1232 1233 /* some sanity checks */ 1234 if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; 1235 1236 /* we need at least a handler */ 1237 if (fmt->fmt_handler == NULL) return -EINVAL; 1238 1239 /* 1240 * XXX: need check validity of fmt_arg_size 1241 */ 1242 1243 spin_lock(&pfm_buffer_fmt_lock); 1244 1245 if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { 1246 printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); 1247 ret = -EBUSY; 1248 goto out; 1249 } 1250 list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); 1251 printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); 1252 1253 out: 1254 spin_unlock(&pfm_buffer_fmt_lock); 1255 return ret; 1256 } 1257 EXPORT_SYMBOL(pfm_register_buffer_fmt); 1258 1259 int 1260 pfm_unregister_buffer_fmt(pfm_uuid_t uuid) 1261 { 1262 pfm_buffer_fmt_t *fmt; 1263 int ret = 0; 1264 1265 spin_lock(&pfm_buffer_fmt_lock); 1266 1267 fmt = __pfm_find_buffer_fmt(uuid); 1268 if (!fmt) { 1269 printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); 1270 ret = -EINVAL; 1271 goto out; 1272 } 1273 list_del_init(&fmt->fmt_list); 1274 printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); 1275 1276 out: 1277 spin_unlock(&pfm_buffer_fmt_lock); 1278 return ret; 1279 1280 } 1281 EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1282 1283 static int 1284 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1285 { 1286 unsigned long flags; 1287 /* 1288 * validity checks on cpu_mask have been done upstream 1289 */ 1290 LOCK_PFS(flags); 1291 1292 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1293 pfm_sessions.pfs_sys_sessions, 1294 pfm_sessions.pfs_task_sessions, 1295 pfm_sessions.pfs_sys_use_dbregs, 1296 is_syswide, 1297 cpu)); 1298 1299 if (is_syswide) { 1300 /* 1301 * cannot mix system wide and per-task sessions 1302 */ 1303 if (pfm_sessions.pfs_task_sessions > 0UL) { 1304 DPRINT(("system wide not possible, %u conflicting task_sessions\n", 1305 pfm_sessions.pfs_task_sessions)); 1306 goto abort; 1307 } 1308 1309 if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; 1310 1311 DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); 1312 1313 pfm_sessions.pfs_sys_session[cpu] = task; 1314 1315 pfm_sessions.pfs_sys_sessions++ ; 1316 1317 } else { 1318 if (pfm_sessions.pfs_sys_sessions) goto abort; 1319 pfm_sessions.pfs_task_sessions++; 1320 } 1321 1322 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1323 pfm_sessions.pfs_sys_sessions, 1324 pfm_sessions.pfs_task_sessions, 1325 pfm_sessions.pfs_sys_use_dbregs, 1326 is_syswide, 1327 cpu)); 1328 1329 /* 1330 * Force idle() into poll mode 1331 */ 1332 cpu_idle_poll_ctrl(true); 1333 1334 UNLOCK_PFS(flags); 1335 1336 return 0; 1337 1338 error_conflict: 1339 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1340 task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), 1341 cpu)); 1342 abort: 1343 UNLOCK_PFS(flags); 1344 1345 return -EBUSY; 1346 1347 } 1348 1349 static int 1350 pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) 1351 { 1352 unsigned long flags; 1353 /* 1354 * validity checks on cpu_mask have been done upstream 1355 */ 1356 LOCK_PFS(flags); 1357 1358 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1359 pfm_sessions.pfs_sys_sessions, 1360 pfm_sessions.pfs_task_sessions, 1361 pfm_sessions.pfs_sys_use_dbregs, 1362 is_syswide, 1363 cpu)); 1364 1365 1366 if (is_syswide) { 1367 pfm_sessions.pfs_sys_session[cpu] = NULL; 1368 /* 1369 * would not work with perfmon+more than one bit in cpu_mask 1370 */ 1371 if (ctx && ctx->ctx_fl_using_dbreg) { 1372 if (pfm_sessions.pfs_sys_use_dbregs == 0) { 1373 printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); 1374 } else { 1375 pfm_sessions.pfs_sys_use_dbregs--; 1376 } 1377 } 1378 pfm_sessions.pfs_sys_sessions--; 1379 } else { 1380 pfm_sessions.pfs_task_sessions--; 1381 } 1382 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1383 pfm_sessions.pfs_sys_sessions, 1384 pfm_sessions.pfs_task_sessions, 1385 pfm_sessions.pfs_sys_use_dbregs, 1386 is_syswide, 1387 cpu)); 1388 1389 /* Undo forced polling. Last session reenables pal_halt */ 1390 cpu_idle_poll_ctrl(false); 1391 1392 UNLOCK_PFS(flags); 1393 1394 return 0; 1395 } 1396 1397 /* 1398 * removes virtual mapping of the sampling buffer. 1399 * IMPORTANT: cannot be called with interrupts disable, e.g. inside 1400 * a PROTECT_CTX() section. 1401 */ 1402 static int 1403 pfm_remove_smpl_mapping(void *vaddr, unsigned long size) 1404 { 1405 struct task_struct *task = current; 1406 int r; 1407 1408 /* sanity checks */ 1409 if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1410 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); 1411 return -EINVAL; 1412 } 1413 1414 DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); 1415 1416 /* 1417 * does the actual unmapping 1418 */ 1419 r = vm_munmap((unsigned long)vaddr, size); 1420 1421 if (r !=0) { 1422 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); 1423 } 1424 1425 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1426 1427 return 0; 1428 } 1429 1430 /* 1431 * free actual physical storage used by sampling buffer 1432 */ 1433 #if 0 1434 static int 1435 pfm_free_smpl_buffer(pfm_context_t *ctx) 1436 { 1437 pfm_buffer_fmt_t *fmt; 1438 1439 if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; 1440 1441 /* 1442 * we won't use the buffer format anymore 1443 */ 1444 fmt = ctx->ctx_buf_fmt; 1445 1446 DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", 1447 ctx->ctx_smpl_hdr, 1448 ctx->ctx_smpl_size, 1449 ctx->ctx_smpl_vaddr)); 1450 1451 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1452 1453 /* 1454 * free the buffer 1455 */ 1456 vfree(ctx->ctx_smpl_hdr); 1457 1458 ctx->ctx_smpl_hdr = NULL; 1459 ctx->ctx_smpl_size = 0UL; 1460 1461 return 0; 1462 1463 invalid_free: 1464 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current)); 1465 return -EINVAL; 1466 } 1467 #endif 1468 1469 static inline void 1470 pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) 1471 { 1472 if (fmt == NULL) return; 1473 1474 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1475 1476 } 1477 1478 /* 1479 * pfmfs should _never_ be mounted by userland - too much of security hassle, 1480 * no real gain from having the whole whorehouse mounted. So we don't need 1481 * any operations on the root directory. However, we need a non-trivial 1482 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1483 */ 1484 static struct vfsmount *pfmfs_mnt __read_mostly; 1485 1486 static int __init 1487 init_pfm_fs(void) 1488 { 1489 int err = register_filesystem(&pfm_fs_type); 1490 if (!err) { 1491 pfmfs_mnt = kern_mount(&pfm_fs_type); 1492 err = PTR_ERR(pfmfs_mnt); 1493 if (IS_ERR(pfmfs_mnt)) 1494 unregister_filesystem(&pfm_fs_type); 1495 else 1496 err = 0; 1497 } 1498 return err; 1499 } 1500 1501 static ssize_t 1502 pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) 1503 { 1504 pfm_context_t *ctx; 1505 pfm_msg_t *msg; 1506 ssize_t ret; 1507 unsigned long flags; 1508 DECLARE_WAITQUEUE(wait, current); 1509 if (PFM_IS_FILE(filp) == 0) { 1510 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1511 return -EINVAL; 1512 } 1513 1514 ctx = filp->private_data; 1515 if (ctx == NULL) { 1516 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1517 return -EINVAL; 1518 } 1519 1520 /* 1521 * check even when there is no message 1522 */ 1523 if (size < sizeof(pfm_msg_t)) { 1524 DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); 1525 return -EINVAL; 1526 } 1527 1528 PROTECT_CTX(ctx, flags); 1529 1530 /* 1531 * put ourselves on the wait queue 1532 */ 1533 add_wait_queue(&ctx->ctx_msgq_wait, &wait); 1534 1535 1536 for(;;) { 1537 /* 1538 * check wait queue 1539 */ 1540 1541 set_current_state(TASK_INTERRUPTIBLE); 1542 1543 DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 1544 1545 ret = 0; 1546 if(PFM_CTXQ_EMPTY(ctx) == 0) break; 1547 1548 UNPROTECT_CTX(ctx, flags); 1549 1550 /* 1551 * check non-blocking read 1552 */ 1553 ret = -EAGAIN; 1554 if(filp->f_flags & O_NONBLOCK) break; 1555 1556 /* 1557 * check pending signals 1558 */ 1559 if(signal_pending(current)) { 1560 ret = -EINTR; 1561 break; 1562 } 1563 /* 1564 * no message, so wait 1565 */ 1566 schedule(); 1567 1568 PROTECT_CTX(ctx, flags); 1569 } 1570 DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); 1571 set_current_state(TASK_RUNNING); 1572 remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1573 1574 if (ret < 0) goto abort; 1575 1576 ret = -EINVAL; 1577 msg = pfm_get_next_msg(ctx); 1578 if (msg == NULL) { 1579 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); 1580 goto abort_locked; 1581 } 1582 1583 DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); 1584 1585 ret = -EFAULT; 1586 if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); 1587 1588 abort_locked: 1589 UNPROTECT_CTX(ctx, flags); 1590 abort: 1591 return ret; 1592 } 1593 1594 static ssize_t 1595 pfm_write(struct file *file, const char __user *ubuf, 1596 size_t size, loff_t *ppos) 1597 { 1598 DPRINT(("pfm_write called\n")); 1599 return -EINVAL; 1600 } 1601 1602 static __poll_t 1603 pfm_poll(struct file *filp, poll_table * wait) 1604 { 1605 pfm_context_t *ctx; 1606 unsigned long flags; 1607 __poll_t mask = 0; 1608 1609 if (PFM_IS_FILE(filp) == 0) { 1610 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1611 return 0; 1612 } 1613 1614 ctx = filp->private_data; 1615 if (ctx == NULL) { 1616 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1617 return 0; 1618 } 1619 1620 1621 DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); 1622 1623 poll_wait(filp, &ctx->ctx_msgq_wait, wait); 1624 1625 PROTECT_CTX(ctx, flags); 1626 1627 if (PFM_CTXQ_EMPTY(ctx) == 0) 1628 mask = EPOLLIN | EPOLLRDNORM; 1629 1630 UNPROTECT_CTX(ctx, flags); 1631 1632 DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); 1633 1634 return mask; 1635 } 1636 1637 static long 1638 pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1639 { 1640 DPRINT(("pfm_ioctl called\n")); 1641 return -EINVAL; 1642 } 1643 1644 /* 1645 * interrupt cannot be masked when coming here 1646 */ 1647 static inline int 1648 pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) 1649 { 1650 int ret; 1651 1652 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1653 1654 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1655 task_pid_nr(current), 1656 fd, 1657 on, 1658 ctx->ctx_async_queue, ret)); 1659 1660 return ret; 1661 } 1662 1663 static int 1664 pfm_fasync(int fd, struct file *filp, int on) 1665 { 1666 pfm_context_t *ctx; 1667 int ret; 1668 1669 if (PFM_IS_FILE(filp) == 0) { 1670 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); 1671 return -EBADF; 1672 } 1673 1674 ctx = filp->private_data; 1675 if (ctx == NULL) { 1676 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1677 return -EBADF; 1678 } 1679 /* 1680 * we cannot mask interrupts during this call because this may 1681 * may go to sleep if memory is not readily avalaible. 1682 * 1683 * We are protected from the conetxt disappearing by the get_fd()/put_fd() 1684 * done in caller. Serialization of this function is ensured by caller. 1685 */ 1686 ret = pfm_do_fasync(fd, filp, ctx, on); 1687 1688 1689 DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1690 fd, 1691 on, 1692 ctx->ctx_async_queue, ret)); 1693 1694 return ret; 1695 } 1696 1697 #ifdef CONFIG_SMP 1698 /* 1699 * this function is exclusively called from pfm_close(). 1700 * The context is not protected at that time, nor are interrupts 1701 * on the remote CPU. That's necessary to avoid deadlocks. 1702 */ 1703 static void 1704 pfm_syswide_force_stop(void *info) 1705 { 1706 pfm_context_t *ctx = (pfm_context_t *)info; 1707 struct pt_regs *regs = task_pt_regs(current); 1708 struct task_struct *owner; 1709 unsigned long flags; 1710 int ret; 1711 1712 if (ctx->ctx_cpu != smp_processor_id()) { 1713 printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", 1714 ctx->ctx_cpu, 1715 smp_processor_id()); 1716 return; 1717 } 1718 owner = GET_PMU_OWNER(); 1719 if (owner != ctx->ctx_task) { 1720 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1721 smp_processor_id(), 1722 task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); 1723 return; 1724 } 1725 if (GET_PMU_CTX() != ctx) { 1726 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", 1727 smp_processor_id(), 1728 GET_PMU_CTX(), ctx); 1729 return; 1730 } 1731 1732 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); 1733 /* 1734 * the context is already protected in pfm_close(), we simply 1735 * need to mask interrupts to avoid a PMU interrupt race on 1736 * this CPU 1737 */ 1738 local_irq_save(flags); 1739 1740 ret = pfm_context_unload(ctx, NULL, 0, regs); 1741 if (ret) { 1742 DPRINT(("context_unload returned %d\n", ret)); 1743 } 1744 1745 /* 1746 * unmask interrupts, PMU interrupts are now spurious here 1747 */ 1748 local_irq_restore(flags); 1749 } 1750 1751 static void 1752 pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) 1753 { 1754 int ret; 1755 1756 DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); 1757 ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); 1758 DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); 1759 } 1760 #endif /* CONFIG_SMP */ 1761 1762 /* 1763 * called for each close(). Partially free resources. 1764 * When caller is self-monitoring, the context is unloaded. 1765 */ 1766 static int 1767 pfm_flush(struct file *filp, fl_owner_t id) 1768 { 1769 pfm_context_t *ctx; 1770 struct task_struct *task; 1771 struct pt_regs *regs; 1772 unsigned long flags; 1773 unsigned long smpl_buf_size = 0UL; 1774 void *smpl_buf_vaddr = NULL; 1775 int state, is_system; 1776 1777 if (PFM_IS_FILE(filp) == 0) { 1778 DPRINT(("bad magic for\n")); 1779 return -EBADF; 1780 } 1781 1782 ctx = filp->private_data; 1783 if (ctx == NULL) { 1784 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1785 return -EBADF; 1786 } 1787 1788 /* 1789 * remove our file from the async queue, if we use this mode. 1790 * This can be done without the context being protected. We come 1791 * here when the context has become unreachable by other tasks. 1792 * 1793 * We may still have active monitoring at this point and we may 1794 * end up in pfm_overflow_handler(). However, fasync_helper() 1795 * operates with interrupts disabled and it cleans up the 1796 * queue. If the PMU handler is called prior to entering 1797 * fasync_helper() then it will send a signal. If it is 1798 * invoked after, it will find an empty queue and no 1799 * signal will be sent. In both case, we are safe 1800 */ 1801 PROTECT_CTX(ctx, flags); 1802 1803 state = ctx->ctx_state; 1804 is_system = ctx->ctx_fl_system; 1805 1806 task = PFM_CTX_TASK(ctx); 1807 regs = task_pt_regs(task); 1808 1809 DPRINT(("ctx_state=%d is_current=%d\n", 1810 state, 1811 task == current ? 1 : 0)); 1812 1813 /* 1814 * if state == UNLOADED, then task is NULL 1815 */ 1816 1817 /* 1818 * we must stop and unload because we are losing access to the context. 1819 */ 1820 if (task == current) { 1821 #ifdef CONFIG_SMP 1822 /* 1823 * the task IS the owner but it migrated to another CPU: that's bad 1824 * but we must handle this cleanly. Unfortunately, the kernel does 1825 * not provide a mechanism to block migration (while the context is loaded). 1826 * 1827 * We need to release the resource on the ORIGINAL cpu. 1828 */ 1829 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 1830 1831 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 1832 /* 1833 * keep context protected but unmask interrupt for IPI 1834 */ 1835 local_irq_restore(flags); 1836 1837 pfm_syswide_cleanup_other_cpu(ctx); 1838 1839 /* 1840 * restore interrupt masking 1841 */ 1842 local_irq_save(flags); 1843 1844 /* 1845 * context is unloaded at this point 1846 */ 1847 } else 1848 #endif /* CONFIG_SMP */ 1849 { 1850 1851 DPRINT(("forcing unload\n")); 1852 /* 1853 * stop and unload, returning with state UNLOADED 1854 * and session unreserved. 1855 */ 1856 pfm_context_unload(ctx, NULL, 0, regs); 1857 1858 DPRINT(("ctx_state=%d\n", ctx->ctx_state)); 1859 } 1860 } 1861 1862 /* 1863 * remove virtual mapping, if any, for the calling task. 1864 * cannot reset ctx field until last user is calling close(). 1865 * 1866 * ctx_smpl_vaddr must never be cleared because it is needed 1867 * by every task with access to the context 1868 * 1869 * When called from do_exit(), the mm context is gone already, therefore 1870 * mm is NULL, i.e., the VMA is already gone and we do not have to 1871 * do anything here 1872 */ 1873 if (ctx->ctx_smpl_vaddr && current->mm) { 1874 smpl_buf_vaddr = ctx->ctx_smpl_vaddr; 1875 smpl_buf_size = ctx->ctx_smpl_size; 1876 } 1877 1878 UNPROTECT_CTX(ctx, flags); 1879 1880 /* 1881 * if there was a mapping, then we systematically remove it 1882 * at this point. Cannot be done inside critical section 1883 * because some VM function reenables interrupts. 1884 * 1885 */ 1886 if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); 1887 1888 return 0; 1889 } 1890 /* 1891 * called either on explicit close() or from exit_files(). 1892 * Only the LAST user of the file gets to this point, i.e., it is 1893 * called only ONCE. 1894 * 1895 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 1896 * (fput()),i.e, last task to access the file. Nobody else can access the 1897 * file at this point. 1898 * 1899 * When called from exit_files(), the VMA has been freed because exit_mm() 1900 * is executed before exit_files(). 1901 * 1902 * When called from exit_files(), the current task is not yet ZOMBIE but we 1903 * flush the PMU state to the context. 1904 */ 1905 static int 1906 pfm_close(struct inode *inode, struct file *filp) 1907 { 1908 pfm_context_t *ctx; 1909 struct task_struct *task; 1910 struct pt_regs *regs; 1911 DECLARE_WAITQUEUE(wait, current); 1912 unsigned long flags; 1913 unsigned long smpl_buf_size = 0UL; 1914 void *smpl_buf_addr = NULL; 1915 int free_possible = 1; 1916 int state, is_system; 1917 1918 DPRINT(("pfm_close called private=%p\n", filp->private_data)); 1919 1920 if (PFM_IS_FILE(filp) == 0) { 1921 DPRINT(("bad magic\n")); 1922 return -EBADF; 1923 } 1924 1925 ctx = filp->private_data; 1926 if (ctx == NULL) { 1927 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1928 return -EBADF; 1929 } 1930 1931 PROTECT_CTX(ctx, flags); 1932 1933 state = ctx->ctx_state; 1934 is_system = ctx->ctx_fl_system; 1935 1936 task = PFM_CTX_TASK(ctx); 1937 regs = task_pt_regs(task); 1938 1939 DPRINT(("ctx_state=%d is_current=%d\n", 1940 state, 1941 task == current ? 1 : 0)); 1942 1943 /* 1944 * if task == current, then pfm_flush() unloaded the context 1945 */ 1946 if (state == PFM_CTX_UNLOADED) goto doit; 1947 1948 /* 1949 * context is loaded/masked and task != current, we need to 1950 * either force an unload or go zombie 1951 */ 1952 1953 /* 1954 * The task is currently blocked or will block after an overflow. 1955 * we must force it to wakeup to get out of the 1956 * MASKED state and transition to the unloaded state by itself. 1957 * 1958 * This situation is only possible for per-task mode 1959 */ 1960 if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { 1961 1962 /* 1963 * set a "partial" zombie state to be checked 1964 * upon return from down() in pfm_handle_work(). 1965 * 1966 * We cannot use the ZOMBIE state, because it is checked 1967 * by pfm_load_regs() which is called upon wakeup from down(). 1968 * In such case, it would free the context and then we would 1969 * return to pfm_handle_work() which would access the 1970 * stale context. Instead, we set a flag invisible to pfm_load_regs() 1971 * but visible to pfm_handle_work(). 1972 * 1973 * For some window of time, we have a zombie context with 1974 * ctx_state = MASKED and not ZOMBIE 1975 */ 1976 ctx->ctx_fl_going_zombie = 1; 1977 1978 /* 1979 * force task to wake up from MASKED state 1980 */ 1981 complete(&ctx->ctx_restart_done); 1982 1983 DPRINT(("waking up ctx_state=%d\n", state)); 1984 1985 /* 1986 * put ourself to sleep waiting for the other 1987 * task to report completion 1988 * 1989 * the context is protected by mutex, therefore there 1990 * is no risk of being notified of completion before 1991 * begin actually on the waitq. 1992 */ 1993 set_current_state(TASK_INTERRUPTIBLE); 1994 add_wait_queue(&ctx->ctx_zombieq, &wait); 1995 1996 UNPROTECT_CTX(ctx, flags); 1997 1998 /* 1999 * XXX: check for signals : 2000 * - ok for explicit close 2001 * - not ok when coming from exit_files() 2002 */ 2003 schedule(); 2004 2005 2006 PROTECT_CTX(ctx, flags); 2007 2008 2009 remove_wait_queue(&ctx->ctx_zombieq, &wait); 2010 set_current_state(TASK_RUNNING); 2011 2012 /* 2013 * context is unloaded at this point 2014 */ 2015 DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); 2016 } 2017 else if (task != current) { 2018 #ifdef CONFIG_SMP 2019 /* 2020 * switch context to zombie state 2021 */ 2022 ctx->ctx_state = PFM_CTX_ZOMBIE; 2023 2024 DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); 2025 /* 2026 * cannot free the context on the spot. deferred until 2027 * the task notices the ZOMBIE state 2028 */ 2029 free_possible = 0; 2030 #else 2031 pfm_context_unload(ctx, NULL, 0, regs); 2032 #endif 2033 } 2034 2035 doit: 2036 /* reload state, may have changed during opening of critical section */ 2037 state = ctx->ctx_state; 2038 2039 /* 2040 * the context is still attached to a task (possibly current) 2041 * we cannot destroy it right now 2042 */ 2043 2044 /* 2045 * we must free the sampling buffer right here because 2046 * we cannot rely on it being cleaned up later by the 2047 * monitored task. It is not possible to free vmalloc'ed 2048 * memory in pfm_load_regs(). Instead, we remove the buffer 2049 * now. should there be subsequent PMU overflow originally 2050 * meant for sampling, the will be converted to spurious 2051 * and that's fine because the monitoring tools is gone anyway. 2052 */ 2053 if (ctx->ctx_smpl_hdr) { 2054 smpl_buf_addr = ctx->ctx_smpl_hdr; 2055 smpl_buf_size = ctx->ctx_smpl_size; 2056 /* no more sampling */ 2057 ctx->ctx_smpl_hdr = NULL; 2058 ctx->ctx_fl_is_sampling = 0; 2059 } 2060 2061 DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", 2062 state, 2063 free_possible, 2064 smpl_buf_addr, 2065 smpl_buf_size)); 2066 2067 if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); 2068 2069 /* 2070 * UNLOADED that the session has already been unreserved. 2071 */ 2072 if (state == PFM_CTX_ZOMBIE) { 2073 pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); 2074 } 2075 2076 /* 2077 * disconnect file descriptor from context must be done 2078 * before we unlock. 2079 */ 2080 filp->private_data = NULL; 2081 2082 /* 2083 * if we free on the spot, the context is now completely unreachable 2084 * from the callers side. The monitored task side is also cut, so we 2085 * can freely cut. 2086 * 2087 * If we have a deferred free, only the caller side is disconnected. 2088 */ 2089 UNPROTECT_CTX(ctx, flags); 2090 2091 /* 2092 * All memory free operations (especially for vmalloc'ed memory) 2093 * MUST be done with interrupts ENABLED. 2094 */ 2095 vfree(smpl_buf_addr); 2096 2097 /* 2098 * return the memory used by the context 2099 */ 2100 if (free_possible) pfm_context_free(ctx); 2101 2102 return 0; 2103 } 2104 2105 static const struct file_operations pfm_file_ops = { 2106 .llseek = no_llseek, 2107 .read = pfm_read, 2108 .write = pfm_write, 2109 .poll = pfm_poll, 2110 .unlocked_ioctl = pfm_ioctl, 2111 .fasync = pfm_fasync, 2112 .release = pfm_close, 2113 .flush = pfm_flush 2114 }; 2115 2116 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) 2117 { 2118 return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", 2119 d_inode(dentry)->i_ino); 2120 } 2121 2122 static const struct dentry_operations pfmfs_dentry_operations = { 2123 .d_delete = always_delete_dentry, 2124 .d_dname = pfmfs_dname, 2125 }; 2126 2127 2128 static struct file * 2129 pfm_alloc_file(pfm_context_t *ctx) 2130 { 2131 struct file *file; 2132 struct inode *inode; 2133 struct path path; 2134 struct qstr this = { .name = "" }; 2135 2136 /* 2137 * allocate a new inode 2138 */ 2139 inode = new_inode(pfmfs_mnt->mnt_sb); 2140 if (!inode) 2141 return ERR_PTR(-ENOMEM); 2142 2143 DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); 2144 2145 inode->i_mode = S_IFCHR|S_IRUGO; 2146 inode->i_uid = current_fsuid(); 2147 inode->i_gid = current_fsgid(); 2148 2149 /* 2150 * allocate a new dcache entry 2151 */ 2152 path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); 2153 if (!path.dentry) { 2154 iput(inode); 2155 return ERR_PTR(-ENOMEM); 2156 } 2157 path.mnt = mntget(pfmfs_mnt); 2158 2159 d_add(path.dentry, inode); 2160 2161 file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2162 if (IS_ERR(file)) { 2163 path_put(&path); 2164 return file; 2165 } 2166 2167 file->f_flags = O_RDONLY; 2168 file->private_data = ctx; 2169 2170 return file; 2171 } 2172 2173 static int 2174 pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) 2175 { 2176 DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); 2177 2178 while (size > 0) { 2179 unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; 2180 2181 2182 if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) 2183 return -ENOMEM; 2184 2185 addr += PAGE_SIZE; 2186 buf += PAGE_SIZE; 2187 size -= PAGE_SIZE; 2188 } 2189 return 0; 2190 } 2191 2192 /* 2193 * allocate a sampling buffer and remaps it into the user address space of the task 2194 */ 2195 static int 2196 pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) 2197 { 2198 struct mm_struct *mm = task->mm; 2199 struct vm_area_struct *vma = NULL; 2200 unsigned long size; 2201 void *smpl_buf; 2202 2203 2204 /* 2205 * the fixed header + requested size and align to page boundary 2206 */ 2207 size = PAGE_ALIGN(rsize); 2208 2209 DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); 2210 2211 /* 2212 * check requested size to avoid Denial-of-service attacks 2213 * XXX: may have to refine this test 2214 * Check against address space limit. 2215 * 2216 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) 2217 * return -ENOMEM; 2218 */ 2219 if (size > task_rlimit(task, RLIMIT_MEMLOCK)) 2220 return -ENOMEM; 2221 2222 /* 2223 * We do the easy to undo allocations first. 2224 */ 2225 smpl_buf = vzalloc(size); 2226 if (smpl_buf == NULL) { 2227 DPRINT(("Can't allocate sampling buffer\n")); 2228 return -ENOMEM; 2229 } 2230 2231 DPRINT(("smpl_buf @%p\n", smpl_buf)); 2232 2233 /* allocate vma */ 2234 vma = vm_area_alloc(mm); 2235 if (!vma) { 2236 DPRINT(("Cannot allocate vma\n")); 2237 goto error_kmem; 2238 } 2239 2240 /* 2241 * partially initialize the vma for the sampling buffer 2242 */ 2243 vma->vm_file = get_file(filp); 2244 vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; 2245 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2246 2247 /* 2248 * Now we have everything we need and we can initialize 2249 * and connect all the data structures 2250 */ 2251 2252 ctx->ctx_smpl_hdr = smpl_buf; 2253 ctx->ctx_smpl_size = size; /* aligned size */ 2254 2255 /* 2256 * Let's do the difficult operations next. 2257 * 2258 * now we atomically find some area in the address space and 2259 * remap the buffer in it. 2260 */ 2261 down_write(&task->mm->mmap_sem); 2262 2263 /* find some free area in address space, must have mmap sem held */ 2264 vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); 2265 if (IS_ERR_VALUE(vma->vm_start)) { 2266 DPRINT(("Cannot find unmapped area for size %ld\n", size)); 2267 up_write(&task->mm->mmap_sem); 2268 goto error; 2269 } 2270 vma->vm_end = vma->vm_start + size; 2271 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 2272 2273 DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); 2274 2275 /* can only be applied to current task, need to have the mm semaphore held when called */ 2276 if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { 2277 DPRINT(("Can't remap buffer\n")); 2278 up_write(&task->mm->mmap_sem); 2279 goto error; 2280 } 2281 2282 /* 2283 * now insert the vma in the vm list for the process, must be 2284 * done with mmap lock held 2285 */ 2286 insert_vm_struct(mm, vma); 2287 2288 vm_stat_account(vma->vm_mm, vma->vm_flags, vma_pages(vma)); 2289 up_write(&task->mm->mmap_sem); 2290 2291 /* 2292 * keep track of user level virtual address 2293 */ 2294 ctx->ctx_smpl_vaddr = (void *)vma->vm_start; 2295 *(unsigned long *)user_vaddr = vma->vm_start; 2296 2297 return 0; 2298 2299 error: 2300 vm_area_free(vma); 2301 error_kmem: 2302 vfree(smpl_buf); 2303 2304 return -ENOMEM; 2305 } 2306 2307 /* 2308 * XXX: do something better here 2309 */ 2310 static int 2311 pfm_bad_permissions(struct task_struct *task) 2312 { 2313 const struct cred *tcred; 2314 kuid_t uid = current_uid(); 2315 kgid_t gid = current_gid(); 2316 int ret; 2317 2318 rcu_read_lock(); 2319 tcred = __task_cred(task); 2320 2321 /* inspired by ptrace_attach() */ 2322 DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", 2323 from_kuid(&init_user_ns, uid), 2324 from_kgid(&init_user_ns, gid), 2325 from_kuid(&init_user_ns, tcred->euid), 2326 from_kuid(&init_user_ns, tcred->suid), 2327 from_kuid(&init_user_ns, tcred->uid), 2328 from_kgid(&init_user_ns, tcred->egid), 2329 from_kgid(&init_user_ns, tcred->sgid))); 2330 2331 ret = ((!uid_eq(uid, tcred->euid)) 2332 || (!uid_eq(uid, tcred->suid)) 2333 || (!uid_eq(uid, tcred->uid)) 2334 || (!gid_eq(gid, tcred->egid)) 2335 || (!gid_eq(gid, tcred->sgid)) 2336 || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); 2337 2338 rcu_read_unlock(); 2339 return ret; 2340 } 2341 2342 static int 2343 pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) 2344 { 2345 int ctx_flags; 2346 2347 /* valid signal */ 2348 2349 ctx_flags = pfx->ctx_flags; 2350 2351 if (ctx_flags & PFM_FL_SYSTEM_WIDE) { 2352 2353 /* 2354 * cannot block in this mode 2355 */ 2356 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { 2357 DPRINT(("cannot use blocking mode when in system wide monitoring\n")); 2358 return -EINVAL; 2359 } 2360 } else { 2361 } 2362 /* probably more to add here */ 2363 2364 return 0; 2365 } 2366 2367 static int 2368 pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, 2369 unsigned int cpu, pfarg_context_t *arg) 2370 { 2371 pfm_buffer_fmt_t *fmt = NULL; 2372 unsigned long size = 0UL; 2373 void *uaddr = NULL; 2374 void *fmt_arg = NULL; 2375 int ret = 0; 2376 #define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) 2377 2378 /* invoke and lock buffer format, if found */ 2379 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2380 if (fmt == NULL) { 2381 DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); 2382 return -EINVAL; 2383 } 2384 2385 /* 2386 * buffer argument MUST be contiguous to pfarg_context_t 2387 */ 2388 if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); 2389 2390 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2391 2392 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); 2393 2394 if (ret) goto error; 2395 2396 /* link buffer format and context */ 2397 ctx->ctx_buf_fmt = fmt; 2398 ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ 2399 2400 /* 2401 * check if buffer format wants to use perfmon buffer allocation/mapping service 2402 */ 2403 ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); 2404 if (ret) goto error; 2405 2406 if (size) { 2407 /* 2408 * buffer is always remapped into the caller's address space 2409 */ 2410 ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); 2411 if (ret) goto error; 2412 2413 /* keep track of user address of buffer */ 2414 arg->ctx_smpl_vaddr = uaddr; 2415 } 2416 ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); 2417 2418 error: 2419 return ret; 2420 } 2421 2422 static void 2423 pfm_reset_pmu_state(pfm_context_t *ctx) 2424 { 2425 int i; 2426 2427 /* 2428 * install reset values for PMC. 2429 */ 2430 for (i=1; PMC_IS_LAST(i) == 0; i++) { 2431 if (PMC_IS_IMPL(i) == 0) continue; 2432 ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); 2433 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); 2434 } 2435 /* 2436 * PMD registers are set to 0UL when the context in memset() 2437 */ 2438 2439 /* 2440 * On context switched restore, we must restore ALL pmc and ALL pmd even 2441 * when they are not actively used by the task. In UP, the incoming process 2442 * may otherwise pick up left over PMC, PMD state from the previous process. 2443 * As opposed to PMD, stale PMC can cause harm to the incoming 2444 * process because they may change what is being measured. 2445 * Therefore, we must systematically reinstall the entire 2446 * PMC state. In SMP, the same thing is possible on the 2447 * same CPU but also on between 2 CPUs. 2448 * 2449 * The problem with PMD is information leaking especially 2450 * to user level when psr.sp=0 2451 * 2452 * There is unfortunately no easy way to avoid this problem 2453 * on either UP or SMP. This definitively slows down the 2454 * pfm_load_regs() function. 2455 */ 2456 2457 /* 2458 * bitmask of all PMCs accessible to this context 2459 * 2460 * PMC0 is treated differently. 2461 */ 2462 ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; 2463 2464 /* 2465 * bitmask of all PMDs that are accessible to this context 2466 */ 2467 ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; 2468 2469 DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); 2470 2471 /* 2472 * useful in case of re-enable after disable 2473 */ 2474 ctx->ctx_used_ibrs[0] = 0UL; 2475 ctx->ctx_used_dbrs[0] = 0UL; 2476 } 2477 2478 static int 2479 pfm_ctx_getsize(void *arg, size_t *sz) 2480 { 2481 pfarg_context_t *req = (pfarg_context_t *)arg; 2482 pfm_buffer_fmt_t *fmt; 2483 2484 *sz = 0; 2485 2486 if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; 2487 2488 fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); 2489 if (fmt == NULL) { 2490 DPRINT(("cannot find buffer format\n")); 2491 return -EINVAL; 2492 } 2493 /* get just enough to copy in user parameters */ 2494 *sz = fmt->fmt_arg_size; 2495 DPRINT(("arg_size=%lu\n", *sz)); 2496 2497 return 0; 2498 } 2499 2500 2501 2502 /* 2503 * cannot attach if : 2504 * - kernel task 2505 * - task not owned by caller 2506 * - task incompatible with context mode 2507 */ 2508 static int 2509 pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) 2510 { 2511 /* 2512 * no kernel task or task not owner by caller 2513 */ 2514 if (task->mm == NULL) { 2515 DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); 2516 return -EPERM; 2517 } 2518 if (pfm_bad_permissions(task)) { 2519 DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); 2520 return -EPERM; 2521 } 2522 /* 2523 * cannot block in self-monitoring mode 2524 */ 2525 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2526 DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); 2527 return -EINVAL; 2528 } 2529 2530 if (task->exit_state == EXIT_ZOMBIE) { 2531 DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); 2532 return -EBUSY; 2533 } 2534 2535 /* 2536 * always ok for self 2537 */ 2538 if (task == current) return 0; 2539 2540 if (!task_is_stopped_or_traced(task)) { 2541 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); 2542 return -EBUSY; 2543 } 2544 /* 2545 * make sure the task is off any CPU 2546 */ 2547 wait_task_inactive(task, 0); 2548 2549 /* more to come... */ 2550 2551 return 0; 2552 } 2553 2554 static int 2555 pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) 2556 { 2557 struct task_struct *p = current; 2558 int ret; 2559 2560 /* XXX: need to add more checks here */ 2561 if (pid < 2) return -EPERM; 2562 2563 if (pid != task_pid_vnr(current)) { 2564 /* make sure task cannot go away while we operate on it */ 2565 p = find_get_task_by_vpid(pid); 2566 if (!p) 2567 return -ESRCH; 2568 } 2569 2570 ret = pfm_task_incompatible(ctx, p); 2571 if (ret == 0) { 2572 *task = p; 2573 } else if (p != current) { 2574 pfm_put_task(p); 2575 } 2576 return ret; 2577 } 2578 2579 2580 2581 static int 2582 pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2583 { 2584 pfarg_context_t *req = (pfarg_context_t *)arg; 2585 struct file *filp; 2586 struct path path; 2587 int ctx_flags; 2588 int fd; 2589 int ret; 2590 2591 /* let's check the arguments first */ 2592 ret = pfarg_is_sane(current, req); 2593 if (ret < 0) 2594 return ret; 2595 2596 ctx_flags = req->ctx_flags; 2597 2598 ret = -ENOMEM; 2599 2600 fd = get_unused_fd_flags(0); 2601 if (fd < 0) 2602 return fd; 2603 2604 ctx = pfm_context_alloc(ctx_flags); 2605 if (!ctx) 2606 goto error; 2607 2608 filp = pfm_alloc_file(ctx); 2609 if (IS_ERR(filp)) { 2610 ret = PTR_ERR(filp); 2611 goto error_file; 2612 } 2613 2614 req->ctx_fd = ctx->ctx_fd = fd; 2615 2616 /* 2617 * does the user want to sample? 2618 */ 2619 if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { 2620 ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); 2621 if (ret) 2622 goto buffer_error; 2623 } 2624 2625 DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", 2626 ctx, 2627 ctx_flags, 2628 ctx->ctx_fl_system, 2629 ctx->ctx_fl_block, 2630 ctx->ctx_fl_excl_idle, 2631 ctx->ctx_fl_no_msg, 2632 ctx->ctx_fd)); 2633 2634 /* 2635 * initialize soft PMU state 2636 */ 2637 pfm_reset_pmu_state(ctx); 2638 2639 fd_install(fd, filp); 2640 2641 return 0; 2642 2643 buffer_error: 2644 path = filp->f_path; 2645 put_filp(filp); 2646 path_put(&path); 2647 2648 if (ctx->ctx_buf_fmt) { 2649 pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); 2650 } 2651 error_file: 2652 pfm_context_free(ctx); 2653 2654 error: 2655 put_unused_fd(fd); 2656 return ret; 2657 } 2658 2659 static inline unsigned long 2660 pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) 2661 { 2662 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; 2663 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; 2664 extern unsigned long carta_random32 (unsigned long seed); 2665 2666 if (reg->flags & PFM_REGFL_RANDOM) { 2667 new_seed = carta_random32(old_seed); 2668 val -= (old_seed & mask); /* counter values are negative numbers! */ 2669 if ((mask >> 32) != 0) 2670 /* construct a full 64-bit random value: */ 2671 new_seed |= carta_random32(old_seed >> 32) << 32; 2672 reg->seed = new_seed; 2673 } 2674 reg->lval = val; 2675 return val; 2676 } 2677 2678 static void 2679 pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2680 { 2681 unsigned long mask = ovfl_regs[0]; 2682 unsigned long reset_others = 0UL; 2683 unsigned long val; 2684 int i; 2685 2686 /* 2687 * now restore reset value on sampling overflowed counters 2688 */ 2689 mask >>= PMU_FIRST_COUNTER; 2690 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2691 2692 if ((mask & 0x1UL) == 0UL) continue; 2693 2694 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2695 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2696 2697 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2698 } 2699 2700 /* 2701 * Now take care of resetting the other registers 2702 */ 2703 for(i = 0; reset_others; i++, reset_others >>= 1) { 2704 2705 if ((reset_others & 0x1) == 0) continue; 2706 2707 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2708 2709 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2710 is_long_reset ? "long" : "short", i, val)); 2711 } 2712 } 2713 2714 static void 2715 pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2716 { 2717 unsigned long mask = ovfl_regs[0]; 2718 unsigned long reset_others = 0UL; 2719 unsigned long val; 2720 int i; 2721 2722 DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); 2723 2724 if (ctx->ctx_state == PFM_CTX_MASKED) { 2725 pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); 2726 return; 2727 } 2728 2729 /* 2730 * now restore reset value on sampling overflowed counters 2731 */ 2732 mask >>= PMU_FIRST_COUNTER; 2733 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2734 2735 if ((mask & 0x1UL) == 0UL) continue; 2736 2737 val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2738 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2739 2740 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2741 2742 pfm_write_soft_counter(ctx, i, val); 2743 } 2744 2745 /* 2746 * Now take care of resetting the other registers 2747 */ 2748 for(i = 0; reset_others; i++, reset_others >>= 1) { 2749 2750 if ((reset_others & 0x1) == 0) continue; 2751 2752 val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2753 2754 if (PMD_IS_COUNTING(i)) { 2755 pfm_write_soft_counter(ctx, i, val); 2756 } else { 2757 ia64_set_pmd(i, val); 2758 } 2759 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2760 is_long_reset ? "long" : "short", i, val)); 2761 } 2762 ia64_srlz_d(); 2763 } 2764 2765 static int 2766 pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2767 { 2768 struct task_struct *task; 2769 pfarg_reg_t *req = (pfarg_reg_t *)arg; 2770 unsigned long value, pmc_pm; 2771 unsigned long smpl_pmds, reset_pmds, impl_pmds; 2772 unsigned int cnum, reg_flags, flags, pmc_type; 2773 int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; 2774 int is_monitor, is_counting, state; 2775 int ret = -EINVAL; 2776 pfm_reg_check_t wr_func; 2777 #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) 2778 2779 state = ctx->ctx_state; 2780 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 2781 is_system = ctx->ctx_fl_system; 2782 task = ctx->ctx_task; 2783 impl_pmds = pmu_conf->impl_pmds[0]; 2784 2785 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 2786 2787 if (is_loaded) { 2788 /* 2789 * In system wide and when the context is loaded, access can only happen 2790 * when the caller is running on the CPU being monitored by the session. 2791 * It does not have to be the owner (ctx_task) of the context per se. 2792 */ 2793 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 2794 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 2795 return -EBUSY; 2796 } 2797 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 2798 } 2799 expert_mode = pfm_sysctl.expert_mode; 2800 2801 for (i = 0; i < count; i++, req++) { 2802 2803 cnum = req->reg_num; 2804 reg_flags = req->reg_flags; 2805 value = req->reg_value; 2806 smpl_pmds = req->reg_smpl_pmds[0]; 2807 reset_pmds = req->reg_reset_pmds[0]; 2808 flags = 0; 2809 2810 2811 if (cnum >= PMU_MAX_PMCS) { 2812 DPRINT(("pmc%u is invalid\n", cnum)); 2813 goto error; 2814 } 2815 2816 pmc_type = pmu_conf->pmc_desc[cnum].type; 2817 pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; 2818 is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; 2819 is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; 2820 2821 /* 2822 * we reject all non implemented PMC as well 2823 * as attempts to modify PMC[0-3] which are used 2824 * as status registers by the PMU 2825 */ 2826 if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { 2827 DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); 2828 goto error; 2829 } 2830 wr_func = pmu_conf->pmc_desc[cnum].write_check; 2831 /* 2832 * If the PMC is a monitor, then if the value is not the default: 2833 * - system-wide session: PMCx.pm=1 (privileged monitor) 2834 * - per-task : PMCx.pm=0 (user monitor) 2835 */ 2836 if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { 2837 DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", 2838 cnum, 2839 pmc_pm, 2840 is_system)); 2841 goto error; 2842 } 2843 2844 if (is_counting) { 2845 /* 2846 * enforce generation of overflow interrupt. Necessary on all 2847 * CPUs. 2848 */ 2849 value |= 1 << PMU_PMC_OI; 2850 2851 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { 2852 flags |= PFM_REGFL_OVFL_NOTIFY; 2853 } 2854 2855 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; 2856 2857 /* verify validity of smpl_pmds */ 2858 if ((smpl_pmds & impl_pmds) != smpl_pmds) { 2859 DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); 2860 goto error; 2861 } 2862 2863 /* verify validity of reset_pmds */ 2864 if ((reset_pmds & impl_pmds) != reset_pmds) { 2865 DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); 2866 goto error; 2867 } 2868 } else { 2869 if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { 2870 DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); 2871 goto error; 2872 } 2873 /* eventid on non-counting monitors are ignored */ 2874 } 2875 2876 /* 2877 * execute write checker, if any 2878 */ 2879 if (likely(expert_mode == 0 && wr_func)) { 2880 ret = (*wr_func)(task, ctx, cnum, &value, regs); 2881 if (ret) goto error; 2882 ret = -EINVAL; 2883 } 2884 2885 /* 2886 * no error on this register 2887 */ 2888 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 2889 2890 /* 2891 * Now we commit the changes to the software state 2892 */ 2893 2894 /* 2895 * update overflow information 2896 */ 2897 if (is_counting) { 2898 /* 2899 * full flag update each time a register is programmed 2900 */ 2901 ctx->ctx_pmds[cnum].flags = flags; 2902 2903 ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; 2904 ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; 2905 ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; 2906 2907 /* 2908 * Mark all PMDS to be accessed as used. 2909 * 2910 * We do not keep track of PMC because we have to 2911 * systematically restore ALL of them. 2912 * 2913 * We do not update the used_monitors mask, because 2914 * if we have not programmed them, then will be in 2915 * a quiescent state, therefore we will not need to 2916 * mask/restore then when context is MASKED. 2917 */ 2918 CTX_USED_PMD(ctx, reset_pmds); 2919 CTX_USED_PMD(ctx, smpl_pmds); 2920 /* 2921 * make sure we do not try to reset on 2922 * restart because we have established new values 2923 */ 2924 if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 2925 } 2926 /* 2927 * Needed in case the user does not initialize the equivalent 2928 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no 2929 * possible leak here. 2930 */ 2931 CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); 2932 2933 /* 2934 * keep track of the monitor PMC that we are using. 2935 * we save the value of the pmc in ctx_pmcs[] and if 2936 * the monitoring is not stopped for the context we also 2937 * place it in the saved state area so that it will be 2938 * picked up later by the context switch code. 2939 * 2940 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). 2941 * 2942 * The value in th_pmcs[] may be modified on overflow, i.e., when 2943 * monitoring needs to be stopped. 2944 */ 2945 if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); 2946 2947 /* 2948 * update context state 2949 */ 2950 ctx->ctx_pmcs[cnum] = value; 2951 2952 if (is_loaded) { 2953 /* 2954 * write thread state 2955 */ 2956 if (is_system == 0) ctx->th_pmcs[cnum] = value; 2957 2958 /* 2959 * write hardware register if we can 2960 */ 2961 if (can_access_pmu) { 2962 ia64_set_pmc(cnum, value); 2963 } 2964 #ifdef CONFIG_SMP 2965 else { 2966 /* 2967 * per-task SMP only here 2968 * 2969 * we are guaranteed that the task is not running on the other CPU, 2970 * we indicate that this PMD will need to be reloaded if the task 2971 * is rescheduled on the CPU it ran last on. 2972 */ 2973 ctx->ctx_reload_pmcs[0] |= 1UL << cnum; 2974 } 2975 #endif 2976 } 2977 2978 DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", 2979 cnum, 2980 value, 2981 is_loaded, 2982 can_access_pmu, 2983 flags, 2984 ctx->ctx_all_pmcs[0], 2985 ctx->ctx_used_pmds[0], 2986 ctx->ctx_pmds[cnum].eventid, 2987 smpl_pmds, 2988 reset_pmds, 2989 ctx->ctx_reload_pmcs[0], 2990 ctx->ctx_used_monitors[0], 2991 ctx->ctx_ovfl_regs[0])); 2992 } 2993 2994 /* 2995 * make sure the changes are visible 2996 */ 2997 if (can_access_pmu) ia64_srlz_d(); 2998 2999 return 0; 3000 error: 3001 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3002 return ret; 3003 } 3004 3005 static int 3006 pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3007 { 3008 struct task_struct *task; 3009 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3010 unsigned long value, hw_value, ovfl_mask; 3011 unsigned int cnum; 3012 int i, can_access_pmu = 0, state; 3013 int is_counting, is_loaded, is_system, expert_mode; 3014 int ret = -EINVAL; 3015 pfm_reg_check_t wr_func; 3016 3017 3018 state = ctx->ctx_state; 3019 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3020 is_system = ctx->ctx_fl_system; 3021 ovfl_mask = pmu_conf->ovfl_val; 3022 task = ctx->ctx_task; 3023 3024 if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; 3025 3026 /* 3027 * on both UP and SMP, we can only write to the PMC when the task is 3028 * the owner of the local PMU. 3029 */ 3030 if (likely(is_loaded)) { 3031 /* 3032 * In system wide and when the context is loaded, access can only happen 3033 * when the caller is running on the CPU being monitored by the session. 3034 * It does not have to be the owner (ctx_task) of the context per se. 3035 */ 3036 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3037 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3038 return -EBUSY; 3039 } 3040 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3041 } 3042 expert_mode = pfm_sysctl.expert_mode; 3043 3044 for (i = 0; i < count; i++, req++) { 3045 3046 cnum = req->reg_num; 3047 value = req->reg_value; 3048 3049 if (!PMD_IS_IMPL(cnum)) { 3050 DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); 3051 goto abort_mission; 3052 } 3053 is_counting = PMD_IS_COUNTING(cnum); 3054 wr_func = pmu_conf->pmd_desc[cnum].write_check; 3055 3056 /* 3057 * execute write checker, if any 3058 */ 3059 if (unlikely(expert_mode == 0 && wr_func)) { 3060 unsigned long v = value; 3061 3062 ret = (*wr_func)(task, ctx, cnum, &v, regs); 3063 if (ret) goto abort_mission; 3064 3065 value = v; 3066 ret = -EINVAL; 3067 } 3068 3069 /* 3070 * no error on this register 3071 */ 3072 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 3073 3074 /* 3075 * now commit changes to software state 3076 */ 3077 hw_value = value; 3078 3079 /* 3080 * update virtualized (64bits) counter 3081 */ 3082 if (is_counting) { 3083 /* 3084 * write context state 3085 */ 3086 ctx->ctx_pmds[cnum].lval = value; 3087 3088 /* 3089 * when context is load we use the split value 3090 */ 3091 if (is_loaded) { 3092 hw_value = value & ovfl_mask; 3093 value = value & ~ovfl_mask; 3094 } 3095 } 3096 /* 3097 * update reset values (not just for counters) 3098 */ 3099 ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; 3100 ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; 3101 3102 /* 3103 * update randomization parameters (not just for counters) 3104 */ 3105 ctx->ctx_pmds[cnum].seed = req->reg_random_seed; 3106 ctx->ctx_pmds[cnum].mask = req->reg_random_mask; 3107 3108 /* 3109 * update context value 3110 */ 3111 ctx->ctx_pmds[cnum].val = value; 3112 3113 /* 3114 * Keep track of what we use 3115 * 3116 * We do not keep track of PMC because we have to 3117 * systematically restore ALL of them. 3118 */ 3119 CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); 3120 3121 /* 3122 * mark this PMD register used as well 3123 */ 3124 CTX_USED_PMD(ctx, RDEP(cnum)); 3125 3126 /* 3127 * make sure we do not try to reset on 3128 * restart because we have established new values 3129 */ 3130 if (is_counting && state == PFM_CTX_MASKED) { 3131 ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3132 } 3133 3134 if (is_loaded) { 3135 /* 3136 * write thread state 3137 */ 3138 if (is_system == 0) ctx->th_pmds[cnum] = hw_value; 3139 3140 /* 3141 * write hardware register if we can 3142 */ 3143 if (can_access_pmu) { 3144 ia64_set_pmd(cnum, hw_value); 3145 } else { 3146 #ifdef CONFIG_SMP 3147 /* 3148 * we are guaranteed that the task is not running on the other CPU, 3149 * we indicate that this PMD will need to be reloaded if the task 3150 * is rescheduled on the CPU it ran last on. 3151 */ 3152 ctx->ctx_reload_pmds[0] |= 1UL << cnum; 3153 #endif 3154 } 3155 } 3156 3157 DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " 3158 "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", 3159 cnum, 3160 value, 3161 is_loaded, 3162 can_access_pmu, 3163 hw_value, 3164 ctx->ctx_pmds[cnum].val, 3165 ctx->ctx_pmds[cnum].short_reset, 3166 ctx->ctx_pmds[cnum].long_reset, 3167 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', 3168 ctx->ctx_pmds[cnum].seed, 3169 ctx->ctx_pmds[cnum].mask, 3170 ctx->ctx_used_pmds[0], 3171 ctx->ctx_pmds[cnum].reset_pmds[0], 3172 ctx->ctx_reload_pmds[0], 3173 ctx->ctx_all_pmds[0], 3174 ctx->ctx_ovfl_regs[0])); 3175 } 3176 3177 /* 3178 * make changes visible 3179 */ 3180 if (can_access_pmu) ia64_srlz_d(); 3181 3182 return 0; 3183 3184 abort_mission: 3185 /* 3186 * for now, we have only one possibility for error 3187 */ 3188 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3189 return ret; 3190 } 3191 3192 /* 3193 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. 3194 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an 3195 * interrupt is delivered during the call, it will be kept pending until we leave, making 3196 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are 3197 * guaranteed to return consistent data to the user, it may simply be old. It is not 3198 * trivial to treat the overflow while inside the call because you may end up in 3199 * some module sampling buffer code causing deadlocks. 3200 */ 3201 static int 3202 pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3203 { 3204 struct task_struct *task; 3205 unsigned long val = 0UL, lval, ovfl_mask, sval; 3206 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3207 unsigned int cnum, reg_flags = 0; 3208 int i, can_access_pmu = 0, state; 3209 int is_loaded, is_system, is_counting, expert_mode; 3210 int ret = -EINVAL; 3211 pfm_reg_check_t rd_func; 3212 3213 /* 3214 * access is possible when loaded only for 3215 * self-monitoring tasks or in UP mode 3216 */ 3217 3218 state = ctx->ctx_state; 3219 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3220 is_system = ctx->ctx_fl_system; 3221 ovfl_mask = pmu_conf->ovfl_val; 3222 task = ctx->ctx_task; 3223 3224 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3225 3226 if (likely(is_loaded)) { 3227 /* 3228 * In system wide and when the context is loaded, access can only happen 3229 * when the caller is running on the CPU being monitored by the session. 3230 * It does not have to be the owner (ctx_task) of the context per se. 3231 */ 3232 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3233 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3234 return -EBUSY; 3235 } 3236 /* 3237 * this can be true when not self-monitoring only in UP 3238 */ 3239 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3240 3241 if (can_access_pmu) ia64_srlz_d(); 3242 } 3243 expert_mode = pfm_sysctl.expert_mode; 3244 3245 DPRINT(("ld=%d apmu=%d ctx_state=%d\n", 3246 is_loaded, 3247 can_access_pmu, 3248 state)); 3249 3250 /* 3251 * on both UP and SMP, we can only read the PMD from the hardware register when 3252 * the task is the owner of the local PMU. 3253 */ 3254 3255 for (i = 0; i < count; i++, req++) { 3256 3257 cnum = req->reg_num; 3258 reg_flags = req->reg_flags; 3259 3260 if (unlikely(!PMD_IS_IMPL(cnum))) goto error; 3261 /* 3262 * we can only read the register that we use. That includes 3263 * the one we explicitly initialize AND the one we want included 3264 * in the sampling buffer (smpl_regs). 3265 * 3266 * Having this restriction allows optimization in the ctxsw routine 3267 * without compromising security (leaks) 3268 */ 3269 if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; 3270 3271 sval = ctx->ctx_pmds[cnum].val; 3272 lval = ctx->ctx_pmds[cnum].lval; 3273 is_counting = PMD_IS_COUNTING(cnum); 3274 3275 /* 3276 * If the task is not the current one, then we check if the 3277 * PMU state is still in the local live register due to lazy ctxsw. 3278 * If true, then we read directly from the registers. 3279 */ 3280 if (can_access_pmu){ 3281 val = ia64_get_pmd(cnum); 3282 } else { 3283 /* 3284 * context has been saved 3285 * if context is zombie, then task does not exist anymore. 3286 * In this case, we use the full value saved in the context (pfm_flush_regs()). 3287 */ 3288 val = is_loaded ? ctx->th_pmds[cnum] : 0UL; 3289 } 3290 rd_func = pmu_conf->pmd_desc[cnum].read_check; 3291 3292 if (is_counting) { 3293 /* 3294 * XXX: need to check for overflow when loaded 3295 */ 3296 val &= ovfl_mask; 3297 val += sval; 3298 } 3299 3300 /* 3301 * execute read checker, if any 3302 */ 3303 if (unlikely(expert_mode == 0 && rd_func)) { 3304 unsigned long v = val; 3305 ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); 3306 if (ret) goto error; 3307 val = v; 3308 ret = -EINVAL; 3309 } 3310 3311 PFM_REG_RETFLAG_SET(reg_flags, 0); 3312 3313 DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); 3314 3315 /* 3316 * update register return value, abort all if problem during copy. 3317 * we only modify the reg_flags field. no check mode is fine because 3318 * access has been verified upfront in sys_perfmonctl(). 3319 */ 3320 req->reg_value = val; 3321 req->reg_flags = reg_flags; 3322 req->reg_last_reset_val = lval; 3323 } 3324 3325 return 0; 3326 3327 error: 3328 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3329 return ret; 3330 } 3331 3332 int 3333 pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3334 { 3335 pfm_context_t *ctx; 3336 3337 if (req == NULL) return -EINVAL; 3338 3339 ctx = GET_PMU_CTX(); 3340 3341 if (ctx == NULL) return -EINVAL; 3342 3343 /* 3344 * for now limit to current task, which is enough when calling 3345 * from overflow handler 3346 */ 3347 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3348 3349 return pfm_write_pmcs(ctx, req, nreq, regs); 3350 } 3351 EXPORT_SYMBOL(pfm_mod_write_pmcs); 3352 3353 int 3354 pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3355 { 3356 pfm_context_t *ctx; 3357 3358 if (req == NULL) return -EINVAL; 3359 3360 ctx = GET_PMU_CTX(); 3361 3362 if (ctx == NULL) return -EINVAL; 3363 3364 /* 3365 * for now limit to current task, which is enough when calling 3366 * from overflow handler 3367 */ 3368 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3369 3370 return pfm_read_pmds(ctx, req, nreq, regs); 3371 } 3372 EXPORT_SYMBOL(pfm_mod_read_pmds); 3373 3374 /* 3375 * Only call this function when a process it trying to 3376 * write the debug registers (reading is always allowed) 3377 */ 3378 int 3379 pfm_use_debug_registers(struct task_struct *task) 3380 { 3381 pfm_context_t *ctx = task->thread.pfm_context; 3382 unsigned long flags; 3383 int ret = 0; 3384 3385 if (pmu_conf->use_rr_dbregs == 0) return 0; 3386 3387 DPRINT(("called for [%d]\n", task_pid_nr(task))); 3388 3389 /* 3390 * do it only once 3391 */ 3392 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; 3393 3394 /* 3395 * Even on SMP, we do not need to use an atomic here because 3396 * the only way in is via ptrace() and this is possible only when the 3397 * process is stopped. Even in the case where the ctxsw out is not totally 3398 * completed by the time we come here, there is no way the 'stopped' process 3399 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. 3400 * So this is always safe. 3401 */ 3402 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; 3403 3404 LOCK_PFS(flags); 3405 3406 /* 3407 * We cannot allow setting breakpoints when system wide monitoring 3408 * sessions are using the debug registers. 3409 */ 3410 if (pfm_sessions.pfs_sys_use_dbregs> 0) 3411 ret = -1; 3412 else 3413 pfm_sessions.pfs_ptrace_use_dbregs++; 3414 3415 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3416 pfm_sessions.pfs_ptrace_use_dbregs, 3417 pfm_sessions.pfs_sys_use_dbregs, 3418 task_pid_nr(task), ret)); 3419 3420 UNLOCK_PFS(flags); 3421 3422 return ret; 3423 } 3424 3425 /* 3426 * This function is called for every task that exits with the 3427 * IA64_THREAD_DBG_VALID set. This indicates a task which was 3428 * able to use the debug registers for debugging purposes via 3429 * ptrace(). Therefore we know it was not using them for 3430 * performance monitoring, so we only decrement the number 3431 * of "ptraced" debug register users to keep the count up to date 3432 */ 3433 int 3434 pfm_release_debug_registers(struct task_struct *task) 3435 { 3436 unsigned long flags; 3437 int ret; 3438 3439 if (pmu_conf->use_rr_dbregs == 0) return 0; 3440 3441 LOCK_PFS(flags); 3442 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3443 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); 3444 ret = -1; 3445 } else { 3446 pfm_sessions.pfs_ptrace_use_dbregs--; 3447 ret = 0; 3448 } 3449 UNLOCK_PFS(flags); 3450 3451 return ret; 3452 } 3453 3454 static int 3455 pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3456 { 3457 struct task_struct *task; 3458 pfm_buffer_fmt_t *fmt; 3459 pfm_ovfl_ctrl_t rst_ctrl; 3460 int state, is_system; 3461 int ret = 0; 3462 3463 state = ctx->ctx_state; 3464 fmt = ctx->ctx_buf_fmt; 3465 is_system = ctx->ctx_fl_system; 3466 task = PFM_CTX_TASK(ctx); 3467 3468 switch(state) { 3469 case PFM_CTX_MASKED: 3470 break; 3471 case PFM_CTX_LOADED: 3472 if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; 3473 /* fall through */ 3474 case PFM_CTX_UNLOADED: 3475 case PFM_CTX_ZOMBIE: 3476 DPRINT(("invalid state=%d\n", state)); 3477 return -EBUSY; 3478 default: 3479 DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); 3480 return -EINVAL; 3481 } 3482 3483 /* 3484 * In system wide and when the context is loaded, access can only happen 3485 * when the caller is running on the CPU being monitored by the session. 3486 * It does not have to be the owner (ctx_task) of the context per se. 3487 */ 3488 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3489 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3490 return -EBUSY; 3491 } 3492 3493 /* sanity check */ 3494 if (unlikely(task == NULL)) { 3495 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); 3496 return -EINVAL; 3497 } 3498 3499 if (task == current || is_system) { 3500 3501 fmt = ctx->ctx_buf_fmt; 3502 3503 DPRINT(("restarting self %d ovfl=0x%lx\n", 3504 task_pid_nr(task), 3505 ctx->ctx_ovfl_regs[0])); 3506 3507 if (CTX_HAS_SMPL(ctx)) { 3508 3509 prefetch(ctx->ctx_smpl_hdr); 3510 3511 rst_ctrl.bits.mask_monitoring = 0; 3512 rst_ctrl.bits.reset_ovfl_pmds = 0; 3513 3514 if (state == PFM_CTX_LOADED) 3515 ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3516 else 3517 ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3518 } else { 3519 rst_ctrl.bits.mask_monitoring = 0; 3520 rst_ctrl.bits.reset_ovfl_pmds = 1; 3521 } 3522 3523 if (ret == 0) { 3524 if (rst_ctrl.bits.reset_ovfl_pmds) 3525 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3526 3527 if (rst_ctrl.bits.mask_monitoring == 0) { 3528 DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); 3529 3530 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3531 } else { 3532 DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); 3533 3534 // cannot use pfm_stop_monitoring(task, regs); 3535 } 3536 } 3537 /* 3538 * clear overflowed PMD mask to remove any stale information 3539 */ 3540 ctx->ctx_ovfl_regs[0] = 0UL; 3541 3542 /* 3543 * back to LOADED state 3544 */ 3545 ctx->ctx_state = PFM_CTX_LOADED; 3546 3547 /* 3548 * XXX: not really useful for self monitoring 3549 */ 3550 ctx->ctx_fl_can_restart = 0; 3551 3552 return 0; 3553 } 3554 3555 /* 3556 * restart another task 3557 */ 3558 3559 /* 3560 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 3561 * one is seen by the task. 3562 */ 3563 if (state == PFM_CTX_MASKED) { 3564 if (ctx->ctx_fl_can_restart == 0) return -EINVAL; 3565 /* 3566 * will prevent subsequent restart before this one is 3567 * seen by other task 3568 */ 3569 ctx->ctx_fl_can_restart = 0; 3570 } 3571 3572 /* 3573 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. 3574 * the task is blocked or on its way to block. That's the normal 3575 * restart path. If the monitoring is not masked, then the task 3576 * can be actively monitoring and we cannot directly intervene. 3577 * Therefore we use the trap mechanism to catch the task and 3578 * force it to reset the buffer/reset PMDs. 3579 * 3580 * if non-blocking, then we ensure that the task will go into 3581 * pfm_handle_work() before returning to user mode. 3582 * 3583 * We cannot explicitly reset another task, it MUST always 3584 * be done by the task itself. This works for system wide because 3585 * the tool that is controlling the session is logically doing 3586 * "self-monitoring". 3587 */ 3588 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3589 DPRINT(("unblocking [%d]\n", task_pid_nr(task))); 3590 complete(&ctx->ctx_restart_done); 3591 } else { 3592 DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); 3593 3594 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3595 3596 PFM_SET_WORK_PENDING(task, 1); 3597 3598 set_notify_resume(task); 3599 3600 /* 3601 * XXX: send reschedule if task runs on another CPU 3602 */ 3603 } 3604 return 0; 3605 } 3606 3607 static int 3608 pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3609 { 3610 unsigned int m = *(unsigned int *)arg; 3611 3612 pfm_sysctl.debug = m == 0 ? 0 : 1; 3613 3614 printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); 3615 3616 if (m == 0) { 3617 memset(pfm_stats, 0, sizeof(pfm_stats)); 3618 for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; 3619 } 3620 return 0; 3621 } 3622 3623 /* 3624 * arg can be NULL and count can be zero for this function 3625 */ 3626 static int 3627 pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3628 { 3629 struct thread_struct *thread = NULL; 3630 struct task_struct *task; 3631 pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; 3632 unsigned long flags; 3633 dbreg_t dbreg; 3634 unsigned int rnum; 3635 int first_time; 3636 int ret = 0, state; 3637 int i, can_access_pmu = 0; 3638 int is_system, is_loaded; 3639 3640 if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; 3641 3642 state = ctx->ctx_state; 3643 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3644 is_system = ctx->ctx_fl_system; 3645 task = ctx->ctx_task; 3646 3647 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3648 3649 /* 3650 * on both UP and SMP, we can only write to the PMC when the task is 3651 * the owner of the local PMU. 3652 */ 3653 if (is_loaded) { 3654 thread = &task->thread; 3655 /* 3656 * In system wide and when the context is loaded, access can only happen 3657 * when the caller is running on the CPU being monitored by the session. 3658 * It does not have to be the owner (ctx_task) of the context per se. 3659 */ 3660 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3661 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3662 return -EBUSY; 3663 } 3664 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3665 } 3666 3667 /* 3668 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w 3669 * ensuring that no real breakpoint can be installed via this call. 3670 * 3671 * IMPORTANT: regs can be NULL in this function 3672 */ 3673 3674 first_time = ctx->ctx_fl_using_dbreg == 0; 3675 3676 /* 3677 * don't bother if we are loaded and task is being debugged 3678 */ 3679 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3680 DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); 3681 return -EBUSY; 3682 } 3683 3684 /* 3685 * check for debug registers in system wide mode 3686 * 3687 * If though a check is done in pfm_context_load(), 3688 * we must repeat it here, in case the registers are 3689 * written after the context is loaded 3690 */ 3691 if (is_loaded) { 3692 LOCK_PFS(flags); 3693 3694 if (first_time && is_system) { 3695 if (pfm_sessions.pfs_ptrace_use_dbregs) 3696 ret = -EBUSY; 3697 else 3698 pfm_sessions.pfs_sys_use_dbregs++; 3699 } 3700 UNLOCK_PFS(flags); 3701 } 3702 3703 if (ret != 0) return ret; 3704 3705 /* 3706 * mark ourself as user of the debug registers for 3707 * perfmon purposes. 3708 */ 3709 ctx->ctx_fl_using_dbreg = 1; 3710 3711 /* 3712 * clear hardware registers to make sure we don't 3713 * pick up stale state. 3714 * 3715 * for a system wide session, we do not use 3716 * thread.dbr, thread.ibr because this process 3717 * never leaves the current CPU and the state 3718 * is shared by all processes running on it 3719 */ 3720 if (first_time && can_access_pmu) { 3721 DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); 3722 for (i=0; i < pmu_conf->num_ibrs; i++) { 3723 ia64_set_ibr(i, 0UL); 3724 ia64_dv_serialize_instruction(); 3725 } 3726 ia64_srlz_i(); 3727 for (i=0; i < pmu_conf->num_dbrs; i++) { 3728 ia64_set_dbr(i, 0UL); 3729 ia64_dv_serialize_data(); 3730 } 3731 ia64_srlz_d(); 3732 } 3733 3734 /* 3735 * Now install the values into the registers 3736 */ 3737 for (i = 0; i < count; i++, req++) { 3738 3739 rnum = req->dbreg_num; 3740 dbreg.val = req->dbreg_value; 3741 3742 ret = -EINVAL; 3743 3744 if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { 3745 DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", 3746 rnum, dbreg.val, mode, i, count)); 3747 3748 goto abort_mission; 3749 } 3750 3751 /* 3752 * make sure we do not install enabled breakpoint 3753 */ 3754 if (rnum & 0x1) { 3755 if (mode == PFM_CODE_RR) 3756 dbreg.ibr.ibr_x = 0; 3757 else 3758 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; 3759 } 3760 3761 PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); 3762 3763 /* 3764 * Debug registers, just like PMC, can only be modified 3765 * by a kernel call. Moreover, perfmon() access to those 3766 * registers are centralized in this routine. The hardware 3767 * does not modify the value of these registers, therefore, 3768 * if we save them as they are written, we can avoid having 3769 * to save them on context switch out. This is made possible 3770 * by the fact that when perfmon uses debug registers, ptrace() 3771 * won't be able to modify them concurrently. 3772 */ 3773 if (mode == PFM_CODE_RR) { 3774 CTX_USED_IBR(ctx, rnum); 3775 3776 if (can_access_pmu) { 3777 ia64_set_ibr(rnum, dbreg.val); 3778 ia64_dv_serialize_instruction(); 3779 } 3780 3781 ctx->ctx_ibrs[rnum] = dbreg.val; 3782 3783 DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", 3784 rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); 3785 } else { 3786 CTX_USED_DBR(ctx, rnum); 3787 3788 if (can_access_pmu) { 3789 ia64_set_dbr(rnum, dbreg.val); 3790 ia64_dv_serialize_data(); 3791 } 3792 ctx->ctx_dbrs[rnum] = dbreg.val; 3793 3794 DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", 3795 rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); 3796 } 3797 } 3798 3799 return 0; 3800 3801 abort_mission: 3802 /* 3803 * in case it was our first attempt, we undo the global modifications 3804 */ 3805 if (first_time) { 3806 LOCK_PFS(flags); 3807 if (ctx->ctx_fl_system) { 3808 pfm_sessions.pfs_sys_use_dbregs--; 3809 } 3810 UNLOCK_PFS(flags); 3811 ctx->ctx_fl_using_dbreg = 0; 3812 } 3813 /* 3814 * install error return flag 3815 */ 3816 PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); 3817 3818 return ret; 3819 } 3820 3821 static int 3822 pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3823 { 3824 return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); 3825 } 3826 3827 static int 3828 pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3829 { 3830 return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); 3831 } 3832 3833 int 3834 pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3835 { 3836 pfm_context_t *ctx; 3837 3838 if (req == NULL) return -EINVAL; 3839 3840 ctx = GET_PMU_CTX(); 3841 3842 if (ctx == NULL) return -EINVAL; 3843 3844 /* 3845 * for now limit to current task, which is enough when calling 3846 * from overflow handler 3847 */ 3848 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3849 3850 return pfm_write_ibrs(ctx, req, nreq, regs); 3851 } 3852 EXPORT_SYMBOL(pfm_mod_write_ibrs); 3853 3854 int 3855 pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3856 { 3857 pfm_context_t *ctx; 3858 3859 if (req == NULL) return -EINVAL; 3860 3861 ctx = GET_PMU_CTX(); 3862 3863 if (ctx == NULL) return -EINVAL; 3864 3865 /* 3866 * for now limit to current task, which is enough when calling 3867 * from overflow handler 3868 */ 3869 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3870 3871 return pfm_write_dbrs(ctx, req, nreq, regs); 3872 } 3873 EXPORT_SYMBOL(pfm_mod_write_dbrs); 3874 3875 3876 static int 3877 pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3878 { 3879 pfarg_features_t *req = (pfarg_features_t *)arg; 3880 3881 req->ft_version = PFM_VERSION; 3882 return 0; 3883 } 3884 3885 static int 3886 pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3887 { 3888 struct pt_regs *tregs; 3889 struct task_struct *task = PFM_CTX_TASK(ctx); 3890 int state, is_system; 3891 3892 state = ctx->ctx_state; 3893 is_system = ctx->ctx_fl_system; 3894 3895 /* 3896 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) 3897 */ 3898 if (state == PFM_CTX_UNLOADED) return -EINVAL; 3899 3900 /* 3901 * In system wide and when the context is loaded, access can only happen 3902 * when the caller is running on the CPU being monitored by the session. 3903 * It does not have to be the owner (ctx_task) of the context per se. 3904 */ 3905 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3906 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3907 return -EBUSY; 3908 } 3909 DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 3910 task_pid_nr(PFM_CTX_TASK(ctx)), 3911 state, 3912 is_system)); 3913 /* 3914 * in system mode, we need to update the PMU directly 3915 * and the user level state of the caller, which may not 3916 * necessarily be the creator of the context. 3917 */ 3918 if (is_system) { 3919 /* 3920 * Update local PMU first 3921 * 3922 * disable dcr pp 3923 */ 3924 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 3925 ia64_srlz_i(); 3926 3927 /* 3928 * update local cpuinfo 3929 */ 3930 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 3931 3932 /* 3933 * stop monitoring, does srlz.i 3934 */ 3935 pfm_clear_psr_pp(); 3936 3937 /* 3938 * stop monitoring in the caller 3939 */ 3940 ia64_psr(regs)->pp = 0; 3941 3942 return 0; 3943 } 3944 /* 3945 * per-task mode 3946 */ 3947 3948 if (task == current) { 3949 /* stop monitoring at kernel level */ 3950 pfm_clear_psr_up(); 3951 3952 /* 3953 * stop monitoring at the user level 3954 */ 3955 ia64_psr(regs)->up = 0; 3956 } else { 3957 tregs = task_pt_regs(task); 3958 3959 /* 3960 * stop monitoring at the user level 3961 */ 3962 ia64_psr(tregs)->up = 0; 3963 3964 /* 3965 * monitoring disabled in kernel at next reschedule 3966 */ 3967 ctx->ctx_saved_psr_up = 0; 3968 DPRINT(("task=[%d]\n", task_pid_nr(task))); 3969 } 3970 return 0; 3971 } 3972 3973 3974 static int 3975 pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3976 { 3977 struct pt_regs *tregs; 3978 int state, is_system; 3979 3980 state = ctx->ctx_state; 3981 is_system = ctx->ctx_fl_system; 3982 3983 if (state != PFM_CTX_LOADED) return -EINVAL; 3984 3985 /* 3986 * In system wide and when the context is loaded, access can only happen 3987 * when the caller is running on the CPU being monitored by the session. 3988 * It does not have to be the owner (ctx_task) of the context per se. 3989 */ 3990 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3991 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3992 return -EBUSY; 3993 } 3994 3995 /* 3996 * in system mode, we need to update the PMU directly 3997 * and the user level state of the caller, which may not 3998 * necessarily be the creator of the context. 3999 */ 4000 if (is_system) { 4001 4002 /* 4003 * set user level psr.pp for the caller 4004 */ 4005 ia64_psr(regs)->pp = 1; 4006 4007 /* 4008 * now update the local PMU and cpuinfo 4009 */ 4010 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); 4011 4012 /* 4013 * start monitoring at kernel level 4014 */ 4015 pfm_set_psr_pp(); 4016 4017 /* enable dcr pp */ 4018 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 4019 ia64_srlz_i(); 4020 4021 return 0; 4022 } 4023 4024 /* 4025 * per-process mode 4026 */ 4027 4028 if (ctx->ctx_task == current) { 4029 4030 /* start monitoring at kernel level */ 4031 pfm_set_psr_up(); 4032 4033 /* 4034 * activate monitoring at user level 4035 */ 4036 ia64_psr(regs)->up = 1; 4037 4038 } else { 4039 tregs = task_pt_regs(ctx->ctx_task); 4040 4041 /* 4042 * start monitoring at the kernel level the next 4043 * time the task is scheduled 4044 */ 4045 ctx->ctx_saved_psr_up = IA64_PSR_UP; 4046 4047 /* 4048 * activate monitoring at user level 4049 */ 4050 ia64_psr(tregs)->up = 1; 4051 } 4052 return 0; 4053 } 4054 4055 static int 4056 pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4057 { 4058 pfarg_reg_t *req = (pfarg_reg_t *)arg; 4059 unsigned int cnum; 4060 int i; 4061 int ret = -EINVAL; 4062 4063 for (i = 0; i < count; i++, req++) { 4064 4065 cnum = req->reg_num; 4066 4067 if (!PMC_IS_IMPL(cnum)) goto abort_mission; 4068 4069 req->reg_value = PMC_DFL_VAL(cnum); 4070 4071 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 4072 4073 DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); 4074 } 4075 return 0; 4076 4077 abort_mission: 4078 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 4079 return ret; 4080 } 4081 4082 static int 4083 pfm_check_task_exist(pfm_context_t *ctx) 4084 { 4085 struct task_struct *g, *t; 4086 int ret = -ESRCH; 4087 4088 read_lock(&tasklist_lock); 4089 4090 do_each_thread (g, t) { 4091 if (t->thread.pfm_context == ctx) { 4092 ret = 0; 4093 goto out; 4094 } 4095 } while_each_thread (g, t); 4096 out: 4097 read_unlock(&tasklist_lock); 4098 4099 DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); 4100 4101 return ret; 4102 } 4103 4104 static int 4105 pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4106 { 4107 struct task_struct *task; 4108 struct thread_struct *thread; 4109 struct pfm_context_t *old; 4110 unsigned long flags; 4111 #ifndef CONFIG_SMP 4112 struct task_struct *owner_task = NULL; 4113 #endif 4114 pfarg_load_t *req = (pfarg_load_t *)arg; 4115 unsigned long *pmcs_source, *pmds_source; 4116 int the_cpu; 4117 int ret = 0; 4118 int state, is_system, set_dbregs = 0; 4119 4120 state = ctx->ctx_state; 4121 is_system = ctx->ctx_fl_system; 4122 /* 4123 * can only load from unloaded or terminated state 4124 */ 4125 if (state != PFM_CTX_UNLOADED) { 4126 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4127 req->load_pid, 4128 ctx->ctx_state)); 4129 return -EBUSY; 4130 } 4131 4132 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4133 4134 if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { 4135 DPRINT(("cannot use blocking mode on self\n")); 4136 return -EINVAL; 4137 } 4138 4139 ret = pfm_get_task(ctx, req->load_pid, &task); 4140 if (ret) { 4141 DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); 4142 return ret; 4143 } 4144 4145 ret = -EINVAL; 4146 4147 /* 4148 * system wide is self monitoring only 4149 */ 4150 if (is_system && task != current) { 4151 DPRINT(("system wide is self monitoring only load_pid=%d\n", 4152 req->load_pid)); 4153 goto error; 4154 } 4155 4156 thread = &task->thread; 4157 4158 ret = 0; 4159 /* 4160 * cannot load a context which is using range restrictions, 4161 * into a task that is being debugged. 4162 */ 4163 if (ctx->ctx_fl_using_dbreg) { 4164 if (thread->flags & IA64_THREAD_DBG_VALID) { 4165 ret = -EBUSY; 4166 DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); 4167 goto error; 4168 } 4169 LOCK_PFS(flags); 4170 4171 if (is_system) { 4172 if (pfm_sessions.pfs_ptrace_use_dbregs) { 4173 DPRINT(("cannot load [%d] dbregs in use\n", 4174 task_pid_nr(task))); 4175 ret = -EBUSY; 4176 } else { 4177 pfm_sessions.pfs_sys_use_dbregs++; 4178 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); 4179 set_dbregs = 1; 4180 } 4181 } 4182 4183 UNLOCK_PFS(flags); 4184 4185 if (ret) goto error; 4186 } 4187 4188 /* 4189 * SMP system-wide monitoring implies self-monitoring. 4190 * 4191 * The programming model expects the task to 4192 * be pinned on a CPU throughout the session. 4193 * Here we take note of the current CPU at the 4194 * time the context is loaded. No call from 4195 * another CPU will be allowed. 4196 * 4197 * The pinning via shed_setaffinity() 4198 * must be done by the calling task prior 4199 * to this call. 4200 * 4201 * systemwide: keep track of CPU this session is supposed to run on 4202 */ 4203 the_cpu = ctx->ctx_cpu = smp_processor_id(); 4204 4205 ret = -EBUSY; 4206 /* 4207 * now reserve the session 4208 */ 4209 ret = pfm_reserve_session(current, is_system, the_cpu); 4210 if (ret) goto error; 4211 4212 /* 4213 * task is necessarily stopped at this point. 4214 * 4215 * If the previous context was zombie, then it got removed in 4216 * pfm_save_regs(). Therefore we should not see it here. 4217 * If we see a context, then this is an active context 4218 * 4219 * XXX: needs to be atomic 4220 */ 4221 DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", 4222 thread->pfm_context, ctx)); 4223 4224 ret = -EBUSY; 4225 old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); 4226 if (old != NULL) { 4227 DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); 4228 goto error_unres; 4229 } 4230 4231 pfm_reset_msgq(ctx); 4232 4233 ctx->ctx_state = PFM_CTX_LOADED; 4234 4235 /* 4236 * link context to task 4237 */ 4238 ctx->ctx_task = task; 4239 4240 if (is_system) { 4241 /* 4242 * we load as stopped 4243 */ 4244 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); 4245 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4246 4247 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); 4248 } else { 4249 thread->flags |= IA64_THREAD_PM_VALID; 4250 } 4251 4252 /* 4253 * propagate into thread-state 4254 */ 4255 pfm_copy_pmds(task, ctx); 4256 pfm_copy_pmcs(task, ctx); 4257 4258 pmcs_source = ctx->th_pmcs; 4259 pmds_source = ctx->th_pmds; 4260 4261 /* 4262 * always the case for system-wide 4263 */ 4264 if (task == current) { 4265 4266 if (is_system == 0) { 4267 4268 /* allow user level control */ 4269 ia64_psr(regs)->sp = 0; 4270 DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); 4271 4272 SET_LAST_CPU(ctx, smp_processor_id()); 4273 INC_ACTIVATION(); 4274 SET_ACTIVATION(ctx); 4275 #ifndef CONFIG_SMP 4276 /* 4277 * push the other task out, if any 4278 */ 4279 owner_task = GET_PMU_OWNER(); 4280 if (owner_task) pfm_lazy_save_regs(owner_task); 4281 #endif 4282 } 4283 /* 4284 * load all PMD from ctx to PMU (as opposed to thread state) 4285 * restore all PMC from ctx to PMU 4286 */ 4287 pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); 4288 pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); 4289 4290 ctx->ctx_reload_pmcs[0] = 0UL; 4291 ctx->ctx_reload_pmds[0] = 0UL; 4292 4293 /* 4294 * guaranteed safe by earlier check against DBG_VALID 4295 */ 4296 if (ctx->ctx_fl_using_dbreg) { 4297 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 4298 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 4299 } 4300 /* 4301 * set new ownership 4302 */ 4303 SET_PMU_OWNER(task, ctx); 4304 4305 DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); 4306 } else { 4307 /* 4308 * when not current, task MUST be stopped, so this is safe 4309 */ 4310 regs = task_pt_regs(task); 4311 4312 /* force a full reload */ 4313 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4314 SET_LAST_CPU(ctx, -1); 4315 4316 /* initial saved psr (stopped) */ 4317 ctx->ctx_saved_psr_up = 0UL; 4318 ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; 4319 } 4320 4321 ret = 0; 4322 4323 error_unres: 4324 if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); 4325 error: 4326 /* 4327 * we must undo the dbregs setting (for system-wide) 4328 */ 4329 if (ret && set_dbregs) { 4330 LOCK_PFS(flags); 4331 pfm_sessions.pfs_sys_use_dbregs--; 4332 UNLOCK_PFS(flags); 4333 } 4334 /* 4335 * release task, there is now a link with the context 4336 */ 4337 if (is_system == 0 && task != current) { 4338 pfm_put_task(task); 4339 4340 if (ret == 0) { 4341 ret = pfm_check_task_exist(ctx); 4342 if (ret) { 4343 ctx->ctx_state = PFM_CTX_UNLOADED; 4344 ctx->ctx_task = NULL; 4345 } 4346 } 4347 } 4348 return ret; 4349 } 4350 4351 /* 4352 * in this function, we do not need to increase the use count 4353 * for the task via get_task_struct(), because we hold the 4354 * context lock. If the task were to disappear while having 4355 * a context attached, it would go through pfm_exit_thread() 4356 * which also grabs the context lock and would therefore be blocked 4357 * until we are here. 4358 */ 4359 static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); 4360 4361 static int 4362 pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4363 { 4364 struct task_struct *task = PFM_CTX_TASK(ctx); 4365 struct pt_regs *tregs; 4366 int prev_state, is_system; 4367 int ret; 4368 4369 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); 4370 4371 prev_state = ctx->ctx_state; 4372 is_system = ctx->ctx_fl_system; 4373 4374 /* 4375 * unload only when necessary 4376 */ 4377 if (prev_state == PFM_CTX_UNLOADED) { 4378 DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); 4379 return 0; 4380 } 4381 4382 /* 4383 * clear psr and dcr bits 4384 */ 4385 ret = pfm_stop(ctx, NULL, 0, regs); 4386 if (ret) return ret; 4387 4388 ctx->ctx_state = PFM_CTX_UNLOADED; 4389 4390 /* 4391 * in system mode, we need to update the PMU directly 4392 * and the user level state of the caller, which may not 4393 * necessarily be the creator of the context. 4394 */ 4395 if (is_system) { 4396 4397 /* 4398 * Update cpuinfo 4399 * 4400 * local PMU is taken care of in pfm_stop() 4401 */ 4402 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); 4403 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); 4404 4405 /* 4406 * save PMDs in context 4407 * release ownership 4408 */ 4409 pfm_flush_pmds(current, ctx); 4410 4411 /* 4412 * at this point we are done with the PMU 4413 * so we can unreserve the resource. 4414 */ 4415 if (prev_state != PFM_CTX_ZOMBIE) 4416 pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); 4417 4418 /* 4419 * disconnect context from task 4420 */ 4421 task->thread.pfm_context = NULL; 4422 /* 4423 * disconnect task from context 4424 */ 4425 ctx->ctx_task = NULL; 4426 4427 /* 4428 * There is nothing more to cleanup here. 4429 */ 4430 return 0; 4431 } 4432 4433 /* 4434 * per-task mode 4435 */ 4436 tregs = task == current ? regs : task_pt_regs(task); 4437 4438 if (task == current) { 4439 /* 4440 * cancel user level control 4441 */ 4442 ia64_psr(regs)->sp = 1; 4443 4444 DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); 4445 } 4446 /* 4447 * save PMDs to context 4448 * release ownership 4449 */ 4450 pfm_flush_pmds(task, ctx); 4451 4452 /* 4453 * at this point we are done with the PMU 4454 * so we can unreserve the resource. 4455 * 4456 * when state was ZOMBIE, we have already unreserved. 4457 */ 4458 if (prev_state != PFM_CTX_ZOMBIE) 4459 pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); 4460 4461 /* 4462 * reset activation counter and psr 4463 */ 4464 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4465 SET_LAST_CPU(ctx, -1); 4466 4467 /* 4468 * PMU state will not be restored 4469 */ 4470 task->thread.flags &= ~IA64_THREAD_PM_VALID; 4471 4472 /* 4473 * break links between context and task 4474 */ 4475 task->thread.pfm_context = NULL; 4476 ctx->ctx_task = NULL; 4477 4478 PFM_SET_WORK_PENDING(task, 0); 4479 4480 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4481 ctx->ctx_fl_can_restart = 0; 4482 ctx->ctx_fl_going_zombie = 0; 4483 4484 DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); 4485 4486 return 0; 4487 } 4488 4489 4490 /* 4491 * called only from exit_thread() 4492 * we come here only if the task has a context attached (loaded or masked) 4493 */ 4494 void 4495 pfm_exit_thread(struct task_struct *task) 4496 { 4497 pfm_context_t *ctx; 4498 unsigned long flags; 4499 struct pt_regs *regs = task_pt_regs(task); 4500 int ret, state; 4501 int free_ok = 0; 4502 4503 ctx = PFM_GET_CTX(task); 4504 4505 PROTECT_CTX(ctx, flags); 4506 4507 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); 4508 4509 state = ctx->ctx_state; 4510 switch(state) { 4511 case PFM_CTX_UNLOADED: 4512 /* 4513 * only comes to this function if pfm_context is not NULL, i.e., cannot 4514 * be in unloaded state 4515 */ 4516 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); 4517 break; 4518 case PFM_CTX_LOADED: 4519 case PFM_CTX_MASKED: 4520 ret = pfm_context_unload(ctx, NULL, 0, regs); 4521 if (ret) { 4522 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4523 } 4524 DPRINT(("ctx unloaded for current state was %d\n", state)); 4525 4526 pfm_end_notify_user(ctx); 4527 break; 4528 case PFM_CTX_ZOMBIE: 4529 ret = pfm_context_unload(ctx, NULL, 0, regs); 4530 if (ret) { 4531 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4532 } 4533 free_ok = 1; 4534 break; 4535 default: 4536 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); 4537 break; 4538 } 4539 UNPROTECT_CTX(ctx, flags); 4540 4541 { u64 psr = pfm_get_psr(); 4542 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 4543 BUG_ON(GET_PMU_OWNER()); 4544 BUG_ON(ia64_psr(regs)->up); 4545 BUG_ON(ia64_psr(regs)->pp); 4546 } 4547 4548 /* 4549 * All memory free operations (especially for vmalloc'ed memory) 4550 * MUST be done with interrupts ENABLED. 4551 */ 4552 if (free_ok) pfm_context_free(ctx); 4553 } 4554 4555 /* 4556 * functions MUST be listed in the increasing order of their index (see permfon.h) 4557 */ 4558 #define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } 4559 #define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } 4560 #define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) 4561 #define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) 4562 #define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} 4563 4564 static pfm_cmd_desc_t pfm_cmd_tab[]={ 4565 /* 0 */PFM_CMD_NONE, 4566 /* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4567 /* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4568 /* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4569 /* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), 4570 /* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), 4571 /* 6 */PFM_CMD_NONE, 4572 /* 7 */PFM_CMD_NONE, 4573 /* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), 4574 /* 9 */PFM_CMD_NONE, 4575 /* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), 4576 /* 11 */PFM_CMD_NONE, 4577 /* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), 4578 /* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), 4579 /* 14 */PFM_CMD_NONE, 4580 /* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4581 /* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), 4582 /* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), 4583 /* 18 */PFM_CMD_NONE, 4584 /* 19 */PFM_CMD_NONE, 4585 /* 20 */PFM_CMD_NONE, 4586 /* 21 */PFM_CMD_NONE, 4587 /* 22 */PFM_CMD_NONE, 4588 /* 23 */PFM_CMD_NONE, 4589 /* 24 */PFM_CMD_NONE, 4590 /* 25 */PFM_CMD_NONE, 4591 /* 26 */PFM_CMD_NONE, 4592 /* 27 */PFM_CMD_NONE, 4593 /* 28 */PFM_CMD_NONE, 4594 /* 29 */PFM_CMD_NONE, 4595 /* 30 */PFM_CMD_NONE, 4596 /* 31 */PFM_CMD_NONE, 4597 /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), 4598 /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) 4599 }; 4600 #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) 4601 4602 static int 4603 pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) 4604 { 4605 struct task_struct *task; 4606 int state, old_state; 4607 4608 recheck: 4609 state = ctx->ctx_state; 4610 task = ctx->ctx_task; 4611 4612 if (task == NULL) { 4613 DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); 4614 return 0; 4615 } 4616 4617 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4618 ctx->ctx_fd, 4619 state, 4620 task_pid_nr(task), 4621 task->state, PFM_CMD_STOPPED(cmd))); 4622 4623 /* 4624 * self-monitoring always ok. 4625 * 4626 * for system-wide the caller can either be the creator of the 4627 * context (to one to which the context is attached to) OR 4628 * a task running on the same CPU as the session. 4629 */ 4630 if (task == current || ctx->ctx_fl_system) return 0; 4631 4632 /* 4633 * we are monitoring another thread 4634 */ 4635 switch(state) { 4636 case PFM_CTX_UNLOADED: 4637 /* 4638 * if context is UNLOADED we are safe to go 4639 */ 4640 return 0; 4641 case PFM_CTX_ZOMBIE: 4642 /* 4643 * no command can operate on a zombie context 4644 */ 4645 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4646 return -EINVAL; 4647 case PFM_CTX_MASKED: 4648 /* 4649 * PMU state has been saved to software even though 4650 * the thread may still be running. 4651 */ 4652 if (cmd != PFM_UNLOAD_CONTEXT) return 0; 4653 } 4654 4655 /* 4656 * context is LOADED or MASKED. Some commands may need to have 4657 * the task stopped. 4658 * 4659 * We could lift this restriction for UP but it would mean that 4660 * the user has no guarantee the task would not run between 4661 * two successive calls to perfmonctl(). That's probably OK. 4662 * If this user wants to ensure the task does not run, then 4663 * the task must be stopped. 4664 */ 4665 if (PFM_CMD_STOPPED(cmd)) { 4666 if (!task_is_stopped_or_traced(task)) { 4667 DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); 4668 return -EBUSY; 4669 } 4670 /* 4671 * task is now stopped, wait for ctxsw out 4672 * 4673 * This is an interesting point in the code. 4674 * We need to unprotect the context because 4675 * the pfm_save_regs() routines needs to grab 4676 * the same lock. There are danger in doing 4677 * this because it leaves a window open for 4678 * another task to get access to the context 4679 * and possibly change its state. The one thing 4680 * that is not possible is for the context to disappear 4681 * because we are protected by the VFS layer, i.e., 4682 * get_fd()/put_fd(). 4683 */ 4684 old_state = state; 4685 4686 UNPROTECT_CTX(ctx, flags); 4687 4688 wait_task_inactive(task, 0); 4689 4690 PROTECT_CTX(ctx, flags); 4691 4692 /* 4693 * we must recheck to verify if state has changed 4694 */ 4695 if (ctx->ctx_state != old_state) { 4696 DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); 4697 goto recheck; 4698 } 4699 } 4700 return 0; 4701 } 4702 4703 /* 4704 * system-call entry point (must return long) 4705 */ 4706 asmlinkage long 4707 sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4708 { 4709 struct fd f = {NULL, 0}; 4710 pfm_context_t *ctx = NULL; 4711 unsigned long flags = 0UL; 4712 void *args_k = NULL; 4713 long ret; /* will expand int return types */ 4714 size_t base_sz, sz, xtra_sz = 0; 4715 int narg, completed_args = 0, call_made = 0, cmd_flags; 4716 int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 4717 int (*getsize)(void *arg, size_t *sz); 4718 #define PFM_MAX_ARGSIZE 4096 4719 4720 /* 4721 * reject any call if perfmon was disabled at initialization 4722 */ 4723 if (unlikely(pmu_conf == NULL)) return -ENOSYS; 4724 4725 if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { 4726 DPRINT(("invalid cmd=%d\n", cmd)); 4727 return -EINVAL; 4728 } 4729 4730 func = pfm_cmd_tab[cmd].cmd_func; 4731 narg = pfm_cmd_tab[cmd].cmd_narg; 4732 base_sz = pfm_cmd_tab[cmd].cmd_argsize; 4733 getsize = pfm_cmd_tab[cmd].cmd_getsize; 4734 cmd_flags = pfm_cmd_tab[cmd].cmd_flags; 4735 4736 if (unlikely(func == NULL)) { 4737 DPRINT(("invalid cmd=%d\n", cmd)); 4738 return -EINVAL; 4739 } 4740 4741 DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", 4742 PFM_CMD_NAME(cmd), 4743 cmd, 4744 narg, 4745 base_sz, 4746 count)); 4747 4748 /* 4749 * check if number of arguments matches what the command expects 4750 */ 4751 if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) 4752 return -EINVAL; 4753 4754 restart_args: 4755 sz = xtra_sz + base_sz*count; 4756 /* 4757 * limit abuse to min page size 4758 */ 4759 if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4760 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); 4761 return -E2BIG; 4762 } 4763 4764 /* 4765 * allocate default-sized argument buffer 4766 */ 4767 if (likely(count && args_k == NULL)) { 4768 args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); 4769 if (args_k == NULL) return -ENOMEM; 4770 } 4771 4772 ret = -EFAULT; 4773 4774 /* 4775 * copy arguments 4776 * 4777 * assume sz = 0 for command without parameters 4778 */ 4779 if (sz && copy_from_user(args_k, arg, sz)) { 4780 DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); 4781 goto error_args; 4782 } 4783 4784 /* 4785 * check if command supports extra parameters 4786 */ 4787 if (completed_args == 0 && getsize) { 4788 /* 4789 * get extra parameters size (based on main argument) 4790 */ 4791 ret = (*getsize)(args_k, &xtra_sz); 4792 if (ret) goto error_args; 4793 4794 completed_args = 1; 4795 4796 DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); 4797 4798 /* retry if necessary */ 4799 if (likely(xtra_sz)) goto restart_args; 4800 } 4801 4802 if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; 4803 4804 ret = -EBADF; 4805 4806 f = fdget(fd); 4807 if (unlikely(f.file == NULL)) { 4808 DPRINT(("invalid fd %d\n", fd)); 4809 goto error_args; 4810 } 4811 if (unlikely(PFM_IS_FILE(f.file) == 0)) { 4812 DPRINT(("fd %d not related to perfmon\n", fd)); 4813 goto error_args; 4814 } 4815 4816 ctx = f.file->private_data; 4817 if (unlikely(ctx == NULL)) { 4818 DPRINT(("no context for fd %d\n", fd)); 4819 goto error_args; 4820 } 4821 prefetch(&ctx->ctx_state); 4822 4823 PROTECT_CTX(ctx, flags); 4824 4825 /* 4826 * check task is stopped 4827 */ 4828 ret = pfm_check_task_state(ctx, cmd, flags); 4829 if (unlikely(ret)) goto abort_locked; 4830 4831 skip_fd: 4832 ret = (*func)(ctx, args_k, count, task_pt_regs(current)); 4833 4834 call_made = 1; 4835 4836 abort_locked: 4837 if (likely(ctx)) { 4838 DPRINT(("context unlocked\n")); 4839 UNPROTECT_CTX(ctx, flags); 4840 } 4841 4842 /* copy argument back to user, if needed */ 4843 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4844 4845 error_args: 4846 if (f.file) 4847 fdput(f); 4848 4849 kfree(args_k); 4850 4851 DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); 4852 4853 return ret; 4854 } 4855 4856 static void 4857 pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) 4858 { 4859 pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; 4860 pfm_ovfl_ctrl_t rst_ctrl; 4861 int state; 4862 int ret = 0; 4863 4864 state = ctx->ctx_state; 4865 /* 4866 * Unlock sampling buffer and reset index atomically 4867 * XXX: not really needed when blocking 4868 */ 4869 if (CTX_HAS_SMPL(ctx)) { 4870 4871 rst_ctrl.bits.mask_monitoring = 0; 4872 rst_ctrl.bits.reset_ovfl_pmds = 0; 4873 4874 if (state == PFM_CTX_LOADED) 4875 ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4876 else 4877 ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4878 } else { 4879 rst_ctrl.bits.mask_monitoring = 0; 4880 rst_ctrl.bits.reset_ovfl_pmds = 1; 4881 } 4882 4883 if (ret == 0) { 4884 if (rst_ctrl.bits.reset_ovfl_pmds) { 4885 pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); 4886 } 4887 if (rst_ctrl.bits.mask_monitoring == 0) { 4888 DPRINT(("resuming monitoring\n")); 4889 if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); 4890 } else { 4891 DPRINT(("stopping monitoring\n")); 4892 //pfm_stop_monitoring(current, regs); 4893 } 4894 ctx->ctx_state = PFM_CTX_LOADED; 4895 } 4896 } 4897 4898 /* 4899 * context MUST BE LOCKED when calling 4900 * can only be called for current 4901 */ 4902 static void 4903 pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) 4904 { 4905 int ret; 4906 4907 DPRINT(("entering for [%d]\n", task_pid_nr(current))); 4908 4909 ret = pfm_context_unload(ctx, NULL, 0, regs); 4910 if (ret) { 4911 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); 4912 } 4913 4914 /* 4915 * and wakeup controlling task, indicating we are now disconnected 4916 */ 4917 wake_up_interruptible(&ctx->ctx_zombieq); 4918 4919 /* 4920 * given that context is still locked, the controlling 4921 * task will only get access when we return from 4922 * pfm_handle_work(). 4923 */ 4924 } 4925 4926 static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); 4927 4928 /* 4929 * pfm_handle_work() can be called with interrupts enabled 4930 * (TIF_NEED_RESCHED) or disabled. The down_interruptible 4931 * call may sleep, therefore we must re-enable interrupts 4932 * to avoid deadlocks. It is safe to do so because this function 4933 * is called ONLY when returning to user level (pUStk=1), in which case 4934 * there is no risk of kernel stack overflow due to deep 4935 * interrupt nesting. 4936 */ 4937 void 4938 pfm_handle_work(void) 4939 { 4940 pfm_context_t *ctx; 4941 struct pt_regs *regs; 4942 unsigned long flags, dummy_flags; 4943 unsigned long ovfl_regs; 4944 unsigned int reason; 4945 int ret; 4946 4947 ctx = PFM_GET_CTX(current); 4948 if (ctx == NULL) { 4949 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", 4950 task_pid_nr(current)); 4951 return; 4952 } 4953 4954 PROTECT_CTX(ctx, flags); 4955 4956 PFM_SET_WORK_PENDING(current, 0); 4957 4958 regs = task_pt_regs(current); 4959 4960 /* 4961 * extract reason for being here and clear 4962 */ 4963 reason = ctx->ctx_fl_trap_reason; 4964 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4965 ovfl_regs = ctx->ctx_ovfl_regs[0]; 4966 4967 DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); 4968 4969 /* 4970 * must be done before we check for simple-reset mode 4971 */ 4972 if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) 4973 goto do_zombie; 4974 4975 //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; 4976 if (reason == PFM_TRAP_REASON_RESET) 4977 goto skip_blocking; 4978 4979 /* 4980 * restore interrupt mask to what it was on entry. 4981 * Could be enabled/diasbled. 4982 */ 4983 UNPROTECT_CTX(ctx, flags); 4984 4985 /* 4986 * force interrupt enable because of down_interruptible() 4987 */ 4988 local_irq_enable(); 4989 4990 DPRINT(("before block sleeping\n")); 4991 4992 /* 4993 * may go through without blocking on SMP systems 4994 * if restart has been received already by the time we call down() 4995 */ 4996 ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); 4997 4998 DPRINT(("after block sleeping ret=%d\n", ret)); 4999 5000 /* 5001 * lock context and mask interrupts again 5002 * We save flags into a dummy because we may have 5003 * altered interrupts mask compared to entry in this 5004 * function. 5005 */ 5006 PROTECT_CTX(ctx, dummy_flags); 5007 5008 /* 5009 * we need to read the ovfl_regs only after wake-up 5010 * because we may have had pfm_write_pmds() in between 5011 * and that can changed PMD values and therefore 5012 * ovfl_regs is reset for these new PMD values. 5013 */ 5014 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5015 5016 if (ctx->ctx_fl_going_zombie) { 5017 do_zombie: 5018 DPRINT(("context is zombie, bailing out\n")); 5019 pfm_context_force_terminate(ctx, regs); 5020 goto nothing_to_do; 5021 } 5022 /* 5023 * in case of interruption of down() we don't restart anything 5024 */ 5025 if (ret < 0) 5026 goto nothing_to_do; 5027 5028 skip_blocking: 5029 pfm_resume_after_ovfl(ctx, ovfl_regs, regs); 5030 ctx->ctx_ovfl_regs[0] = 0UL; 5031 5032 nothing_to_do: 5033 /* 5034 * restore flags as they were upon entry 5035 */ 5036 UNPROTECT_CTX(ctx, flags); 5037 } 5038 5039 static int 5040 pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) 5041 { 5042 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5043 DPRINT(("ignoring overflow notification, owner is zombie\n")); 5044 return 0; 5045 } 5046 5047 DPRINT(("waking up somebody\n")); 5048 5049 if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); 5050 5051 /* 5052 * safe, we are not in intr handler, nor in ctxsw when 5053 * we come here 5054 */ 5055 kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); 5056 5057 return 0; 5058 } 5059 5060 static int 5061 pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) 5062 { 5063 pfm_msg_t *msg = NULL; 5064 5065 if (ctx->ctx_fl_no_msg == 0) { 5066 msg = pfm_get_new_msg(ctx); 5067 if (msg == NULL) { 5068 printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); 5069 return -1; 5070 } 5071 5072 msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; 5073 msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; 5074 msg->pfm_ovfl_msg.msg_active_set = 0; 5075 msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; 5076 msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; 5077 msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; 5078 msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; 5079 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5080 } 5081 5082 DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", 5083 msg, 5084 ctx->ctx_fl_no_msg, 5085 ctx->ctx_fd, 5086 ovfl_pmds)); 5087 5088 return pfm_notify_user(ctx, msg); 5089 } 5090 5091 static int 5092 pfm_end_notify_user(pfm_context_t *ctx) 5093 { 5094 pfm_msg_t *msg; 5095 5096 msg = pfm_get_new_msg(ctx); 5097 if (msg == NULL) { 5098 printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); 5099 return -1; 5100 } 5101 /* no leak */ 5102 memset(msg, 0, sizeof(*msg)); 5103 5104 msg->pfm_end_msg.msg_type = PFM_MSG_END; 5105 msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; 5106 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5107 5108 DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", 5109 msg, 5110 ctx->ctx_fl_no_msg, 5111 ctx->ctx_fd)); 5112 5113 return pfm_notify_user(ctx, msg); 5114 } 5115 5116 /* 5117 * main overflow processing routine. 5118 * it can be called from the interrupt path or explicitly during the context switch code 5119 */ 5120 static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, 5121 unsigned long pmc0, struct pt_regs *regs) 5122 { 5123 pfm_ovfl_arg_t *ovfl_arg; 5124 unsigned long mask; 5125 unsigned long old_val, ovfl_val, new_val; 5126 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; 5127 unsigned long tstamp; 5128 pfm_ovfl_ctrl_t ovfl_ctrl; 5129 unsigned int i, has_smpl; 5130 int must_notify = 0; 5131 5132 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; 5133 5134 /* 5135 * sanity test. Should never happen 5136 */ 5137 if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; 5138 5139 tstamp = ia64_get_itc(); 5140 mask = pmc0 >> PMU_FIRST_COUNTER; 5141 ovfl_val = pmu_conf->ovfl_val; 5142 has_smpl = CTX_HAS_SMPL(ctx); 5143 5144 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5145 "used_pmds=0x%lx\n", 5146 pmc0, 5147 task ? task_pid_nr(task): -1, 5148 (regs ? regs->cr_iip : 0), 5149 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5150 ctx->ctx_used_pmds[0])); 5151 5152 5153 /* 5154 * first we update the virtual counters 5155 * assume there was a prior ia64_srlz_d() issued 5156 */ 5157 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { 5158 5159 /* skip pmd which did not overflow */ 5160 if ((mask & 0x1) == 0) continue; 5161 5162 /* 5163 * Note that the pmd is not necessarily 0 at this point as qualified events 5164 * may have happened before the PMU was frozen. The residual count is not 5165 * taken into consideration here but will be with any read of the pmd via 5166 * pfm_read_pmds(). 5167 */ 5168 old_val = new_val = ctx->ctx_pmds[i].val; 5169 new_val += 1 + ovfl_val; 5170 ctx->ctx_pmds[i].val = new_val; 5171 5172 /* 5173 * check for overflow condition 5174 */ 5175 if (likely(old_val > new_val)) { 5176 ovfl_pmds |= 1UL << i; 5177 if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; 5178 } 5179 5180 DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 5181 i, 5182 new_val, 5183 old_val, 5184 ia64_get_pmd(i) & ovfl_val, 5185 ovfl_pmds, 5186 ovfl_notify)); 5187 } 5188 5189 /* 5190 * there was no 64-bit overflow, nothing else to do 5191 */ 5192 if (ovfl_pmds == 0UL) return; 5193 5194 /* 5195 * reset all control bits 5196 */ 5197 ovfl_ctrl.val = 0; 5198 reset_pmds = 0UL; 5199 5200 /* 5201 * if a sampling format module exists, then we "cache" the overflow by 5202 * calling the module's handler() routine. 5203 */ 5204 if (has_smpl) { 5205 unsigned long start_cycles, end_cycles; 5206 unsigned long pmd_mask; 5207 int j, k, ret = 0; 5208 int this_cpu = smp_processor_id(); 5209 5210 pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; 5211 ovfl_arg = &ctx->ctx_ovfl_arg; 5212 5213 prefetch(ctx->ctx_smpl_hdr); 5214 5215 for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { 5216 5217 mask = 1UL << i; 5218 5219 if ((pmd_mask & 0x1) == 0) continue; 5220 5221 ovfl_arg->ovfl_pmd = (unsigned char )i; 5222 ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; 5223 ovfl_arg->active_set = 0; 5224 ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ 5225 ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; 5226 5227 ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; 5228 ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; 5229 ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; 5230 5231 /* 5232 * copy values of pmds of interest. Sampling format may copy them 5233 * into sampling buffer. 5234 */ 5235 if (smpl_pmds) { 5236 for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { 5237 if ((smpl_pmds & 0x1) == 0) continue; 5238 ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); 5239 DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); 5240 } 5241 } 5242 5243 pfm_stats[this_cpu].pfm_smpl_handler_calls++; 5244 5245 start_cycles = ia64_get_itc(); 5246 5247 /* 5248 * call custom buffer format record (handler) routine 5249 */ 5250 ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); 5251 5252 end_cycles = ia64_get_itc(); 5253 5254 /* 5255 * For those controls, we take the union because they have 5256 * an all or nothing behavior. 5257 */ 5258 ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; 5259 ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; 5260 ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; 5261 /* 5262 * build the bitmask of pmds to reset now 5263 */ 5264 if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; 5265 5266 pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; 5267 } 5268 /* 5269 * when the module cannot handle the rest of the overflows, we abort right here 5270 */ 5271 if (ret && pmd_mask) { 5272 DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", 5273 pmd_mask<<PMU_FIRST_COUNTER)); 5274 } 5275 /* 5276 * remove the pmds we reset now from the set of pmds to reset in pfm_restart() 5277 */ 5278 ovfl_pmds &= ~reset_pmds; 5279 } else { 5280 /* 5281 * when no sampling module is used, then the default 5282 * is to notify on overflow if requested by user 5283 */ 5284 ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; 5285 ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; 5286 ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ 5287 ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; 5288 /* 5289 * if needed, we reset all overflowed pmds 5290 */ 5291 if (ovfl_notify == 0) reset_pmds = ovfl_pmds; 5292 } 5293 5294 DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); 5295 5296 /* 5297 * reset the requested PMD registers using the short reset values 5298 */ 5299 if (reset_pmds) { 5300 unsigned long bm = reset_pmds; 5301 pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); 5302 } 5303 5304 if (ovfl_notify && ovfl_ctrl.bits.notify_user) { 5305 /* 5306 * keep track of what to reset when unblocking 5307 */ 5308 ctx->ctx_ovfl_regs[0] = ovfl_pmds; 5309 5310 /* 5311 * check for blocking context 5312 */ 5313 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { 5314 5315 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; 5316 5317 /* 5318 * set the perfmon specific checking pending work for the task 5319 */ 5320 PFM_SET_WORK_PENDING(task, 1); 5321 5322 /* 5323 * when coming from ctxsw, current still points to the 5324 * previous task, therefore we must work with task and not current. 5325 */ 5326 set_notify_resume(task); 5327 } 5328 /* 5329 * defer until state is changed (shorten spin window). the context is locked 5330 * anyway, so the signal receiver would come spin for nothing. 5331 */ 5332 must_notify = 1; 5333 } 5334 5335 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5336 GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, 5337 PFM_GET_WORK_PENDING(task), 5338 ctx->ctx_fl_trap_reason, 5339 ovfl_pmds, 5340 ovfl_notify, 5341 ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); 5342 /* 5343 * in case monitoring must be stopped, we toggle the psr bits 5344 */ 5345 if (ovfl_ctrl.bits.mask_monitoring) { 5346 pfm_mask_monitoring(task); 5347 ctx->ctx_state = PFM_CTX_MASKED; 5348 ctx->ctx_fl_can_restart = 1; 5349 } 5350 5351 /* 5352 * send notification now 5353 */ 5354 if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); 5355 5356 return; 5357 5358 sanity_check: 5359 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5360 smp_processor_id(), 5361 task ? task_pid_nr(task) : -1, 5362 pmc0); 5363 return; 5364 5365 stop_monitoring: 5366 /* 5367 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). 5368 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can 5369 * come here as zombie only if the task is the current task. In which case, we 5370 * can access the PMU hardware directly. 5371 * 5372 * Note that zombies do have PM_VALID set. So here we do the minimal. 5373 * 5374 * In case the context was zombified it could not be reclaimed at the time 5375 * the monitoring program exited. At this point, the PMU reservation has been 5376 * returned, the sampiing buffer has been freed. We must convert this call 5377 * into a spurious interrupt. However, we must also avoid infinite overflows 5378 * by stopping monitoring for this task. We can only come here for a per-task 5379 * context. All we need to do is to stop monitoring using the psr bits which 5380 * are always task private. By re-enabling secure montioring, we ensure that 5381 * the monitored task will not be able to re-activate monitoring. 5382 * The task will eventually be context switched out, at which point the context 5383 * will be reclaimed (that includes releasing ownership of the PMU). 5384 * 5385 * So there might be a window of time where the number of per-task session is zero 5386 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie 5387 * context. This is safe because if a per-task session comes in, it will push this one 5388 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide 5389 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will 5390 * also push our zombie context out. 5391 * 5392 * Overall pretty hairy stuff.... 5393 */ 5394 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); 5395 pfm_clear_psr_up(); 5396 ia64_psr(regs)->up = 0; 5397 ia64_psr(regs)->sp = 1; 5398 return; 5399 } 5400 5401 static int 5402 pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) 5403 { 5404 struct task_struct *task; 5405 pfm_context_t *ctx; 5406 unsigned long flags; 5407 u64 pmc0; 5408 int this_cpu = smp_processor_id(); 5409 int retval = 0; 5410 5411 pfm_stats[this_cpu].pfm_ovfl_intr_count++; 5412 5413 /* 5414 * srlz.d done before arriving here 5415 */ 5416 pmc0 = ia64_get_pmc(0); 5417 5418 task = GET_PMU_OWNER(); 5419 ctx = GET_PMU_CTX(); 5420 5421 /* 5422 * if we have some pending bits set 5423 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 5424 */ 5425 if (PMC0_HAS_OVFL(pmc0) && task) { 5426 /* 5427 * we assume that pmc0.fr is always set here 5428 */ 5429 5430 /* sanity check */ 5431 if (!ctx) goto report_spurious1; 5432 5433 if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 5434 goto report_spurious2; 5435 5436 PROTECT_CTX_NOPRINT(ctx, flags); 5437 5438 pfm_overflow_handler(task, ctx, pmc0, regs); 5439 5440 UNPROTECT_CTX_NOPRINT(ctx, flags); 5441 5442 } else { 5443 pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; 5444 retval = -1; 5445 } 5446 /* 5447 * keep it unfrozen at all times 5448 */ 5449 pfm_unfreeze_pmu(); 5450 5451 return retval; 5452 5453 report_spurious1: 5454 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5455 this_cpu, task_pid_nr(task)); 5456 pfm_unfreeze_pmu(); 5457 return -1; 5458 report_spurious2: 5459 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5460 this_cpu, 5461 task_pid_nr(task)); 5462 pfm_unfreeze_pmu(); 5463 return -1; 5464 } 5465 5466 static irqreturn_t 5467 pfm_interrupt_handler(int irq, void *arg) 5468 { 5469 unsigned long start_cycles, total_cycles; 5470 unsigned long min, max; 5471 int this_cpu; 5472 int ret; 5473 struct pt_regs *regs = get_irq_regs(); 5474 5475 this_cpu = get_cpu(); 5476 if (likely(!pfm_alt_intr_handler)) { 5477 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5478 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5479 5480 start_cycles = ia64_get_itc(); 5481 5482 ret = pfm_do_interrupt_handler(arg, regs); 5483 5484 total_cycles = ia64_get_itc(); 5485 5486 /* 5487 * don't measure spurious interrupts 5488 */ 5489 if (likely(ret == 0)) { 5490 total_cycles -= start_cycles; 5491 5492 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5493 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5494 5495 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5496 } 5497 } 5498 else { 5499 (*pfm_alt_intr_handler->handler)(irq, arg, regs); 5500 } 5501 5502 put_cpu(); 5503 return IRQ_HANDLED; 5504 } 5505 5506 /* 5507 * /proc/perfmon interface, for debug only 5508 */ 5509 5510 #define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) 5511 5512 static void * 5513 pfm_proc_start(struct seq_file *m, loff_t *pos) 5514 { 5515 if (*pos == 0) { 5516 return PFM_PROC_SHOW_HEADER; 5517 } 5518 5519 while (*pos <= nr_cpu_ids) { 5520 if (cpu_online(*pos - 1)) { 5521 return (void *)*pos; 5522 } 5523 ++*pos; 5524 } 5525 return NULL; 5526 } 5527 5528 static void * 5529 pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) 5530 { 5531 ++*pos; 5532 return pfm_proc_start(m, pos); 5533 } 5534 5535 static void 5536 pfm_proc_stop(struct seq_file *m, void *v) 5537 { 5538 } 5539 5540 static void 5541 pfm_proc_show_header(struct seq_file *m) 5542 { 5543 struct list_head * pos; 5544 pfm_buffer_fmt_t * entry; 5545 unsigned long flags; 5546 5547 seq_printf(m, 5548 "perfmon version : %u.%u\n" 5549 "model : %s\n" 5550 "fastctxsw : %s\n" 5551 "expert mode : %s\n" 5552 "ovfl_mask : 0x%lx\n" 5553 "PMU flags : 0x%x\n", 5554 PFM_VERSION_MAJ, PFM_VERSION_MIN, 5555 pmu_conf->pmu_name, 5556 pfm_sysctl.fastctxsw > 0 ? "Yes": "No", 5557 pfm_sysctl.expert_mode > 0 ? "Yes": "No", 5558 pmu_conf->ovfl_val, 5559 pmu_conf->flags); 5560 5561 LOCK_PFS(flags); 5562 5563 seq_printf(m, 5564 "proc_sessions : %u\n" 5565 "sys_sessions : %u\n" 5566 "sys_use_dbregs : %u\n" 5567 "ptrace_use_dbregs : %u\n", 5568 pfm_sessions.pfs_task_sessions, 5569 pfm_sessions.pfs_sys_sessions, 5570 pfm_sessions.pfs_sys_use_dbregs, 5571 pfm_sessions.pfs_ptrace_use_dbregs); 5572 5573 UNLOCK_PFS(flags); 5574 5575 spin_lock(&pfm_buffer_fmt_lock); 5576 5577 list_for_each(pos, &pfm_buffer_fmt_list) { 5578 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 5579 seq_printf(m, "format : %16phD %s\n", 5580 entry->fmt_uuid, entry->fmt_name); 5581 } 5582 spin_unlock(&pfm_buffer_fmt_lock); 5583 5584 } 5585 5586 static int 5587 pfm_proc_show(struct seq_file *m, void *v) 5588 { 5589 unsigned long psr; 5590 unsigned int i; 5591 int cpu; 5592 5593 if (v == PFM_PROC_SHOW_HEADER) { 5594 pfm_proc_show_header(m); 5595 return 0; 5596 } 5597 5598 /* show info for CPU (v - 1) */ 5599 5600 cpu = (long)v - 1; 5601 seq_printf(m, 5602 "CPU%-2d overflow intrs : %lu\n" 5603 "CPU%-2d overflow cycles : %lu\n" 5604 "CPU%-2d overflow min : %lu\n" 5605 "CPU%-2d overflow max : %lu\n" 5606 "CPU%-2d smpl handler calls : %lu\n" 5607 "CPU%-2d smpl handler cycles : %lu\n" 5608 "CPU%-2d spurious intrs : %lu\n" 5609 "CPU%-2d replay intrs : %lu\n" 5610 "CPU%-2d syst_wide : %d\n" 5611 "CPU%-2d dcr_pp : %d\n" 5612 "CPU%-2d exclude idle : %d\n" 5613 "CPU%-2d owner : %d\n" 5614 "CPU%-2d context : %p\n" 5615 "CPU%-2d activations : %lu\n", 5616 cpu, pfm_stats[cpu].pfm_ovfl_intr_count, 5617 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, 5618 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, 5619 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, 5620 cpu, pfm_stats[cpu].pfm_smpl_handler_calls, 5621 cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, 5622 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, 5623 cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, 5624 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, 5625 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, 5626 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, 5627 cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, 5628 cpu, pfm_get_cpu_data(pmu_ctx, cpu), 5629 cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); 5630 5631 if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { 5632 5633 psr = pfm_get_psr(); 5634 5635 ia64_srlz_d(); 5636 5637 seq_printf(m, 5638 "CPU%-2d psr : 0x%lx\n" 5639 "CPU%-2d pmc0 : 0x%lx\n", 5640 cpu, psr, 5641 cpu, ia64_get_pmc(0)); 5642 5643 for (i=0; PMC_IS_LAST(i) == 0; i++) { 5644 if (PMC_IS_COUNTING(i) == 0) continue; 5645 seq_printf(m, 5646 "CPU%-2d pmc%u : 0x%lx\n" 5647 "CPU%-2d pmd%u : 0x%lx\n", 5648 cpu, i, ia64_get_pmc(i), 5649 cpu, i, ia64_get_pmd(i)); 5650 } 5651 } 5652 return 0; 5653 } 5654 5655 const struct seq_operations pfm_seq_ops = { 5656 .start = pfm_proc_start, 5657 .next = pfm_proc_next, 5658 .stop = pfm_proc_stop, 5659 .show = pfm_proc_show 5660 }; 5661 5662 /* 5663 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens 5664 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring 5665 * is active or inactive based on mode. We must rely on the value in 5666 * local_cpu_data->pfm_syst_info 5667 */ 5668 void 5669 pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) 5670 { 5671 struct pt_regs *regs; 5672 unsigned long dcr; 5673 unsigned long dcr_pp; 5674 5675 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; 5676 5677 /* 5678 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 5679 * on every CPU, so we can rely on the pid to identify the idle task. 5680 */ 5681 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { 5682 regs = task_pt_regs(task); 5683 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; 5684 return; 5685 } 5686 /* 5687 * if monitoring has started 5688 */ 5689 if (dcr_pp) { 5690 dcr = ia64_getreg(_IA64_REG_CR_DCR); 5691 /* 5692 * context switching in? 5693 */ 5694 if (is_ctxswin) { 5695 /* mask monitoring for the idle task */ 5696 ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); 5697 pfm_clear_psr_pp(); 5698 ia64_srlz_i(); 5699 return; 5700 } 5701 /* 5702 * context switching out 5703 * restore monitoring for next task 5704 * 5705 * Due to inlining this odd if-then-else construction generates 5706 * better code. 5707 */ 5708 ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); 5709 pfm_set_psr_pp(); 5710 ia64_srlz_i(); 5711 } 5712 } 5713 5714 #ifdef CONFIG_SMP 5715 5716 static void 5717 pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) 5718 { 5719 struct task_struct *task = ctx->ctx_task; 5720 5721 ia64_psr(regs)->up = 0; 5722 ia64_psr(regs)->sp = 1; 5723 5724 if (GET_PMU_OWNER() == task) { 5725 DPRINT(("cleared ownership for [%d]\n", 5726 task_pid_nr(ctx->ctx_task))); 5727 SET_PMU_OWNER(NULL, NULL); 5728 } 5729 5730 /* 5731 * disconnect the task from the context and vice-versa 5732 */ 5733 PFM_SET_WORK_PENDING(task, 0); 5734 5735 task->thread.pfm_context = NULL; 5736 task->thread.flags &= ~IA64_THREAD_PM_VALID; 5737 5738 DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); 5739 } 5740 5741 5742 /* 5743 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5744 */ 5745 void 5746 pfm_save_regs(struct task_struct *task) 5747 { 5748 pfm_context_t *ctx; 5749 unsigned long flags; 5750 u64 psr; 5751 5752 5753 ctx = PFM_GET_CTX(task); 5754 if (ctx == NULL) return; 5755 5756 /* 5757 * we always come here with interrupts ALREADY disabled by 5758 * the scheduler. So we simply need to protect against concurrent 5759 * access, not CPU concurrency. 5760 */ 5761 flags = pfm_protect_ctx_ctxsw(ctx); 5762 5763 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5764 struct pt_regs *regs = task_pt_regs(task); 5765 5766 pfm_clear_psr_up(); 5767 5768 pfm_force_cleanup(ctx, regs); 5769 5770 BUG_ON(ctx->ctx_smpl_hdr); 5771 5772 pfm_unprotect_ctx_ctxsw(ctx, flags); 5773 5774 pfm_context_free(ctx); 5775 return; 5776 } 5777 5778 /* 5779 * save current PSR: needed because we modify it 5780 */ 5781 ia64_srlz_d(); 5782 psr = pfm_get_psr(); 5783 5784 BUG_ON(psr & (IA64_PSR_I)); 5785 5786 /* 5787 * stop monitoring: 5788 * This is the last instruction which may generate an overflow 5789 * 5790 * We do not need to set psr.sp because, it is irrelevant in kernel. 5791 * It will be restored from ipsr when going back to user level 5792 */ 5793 pfm_clear_psr_up(); 5794 5795 /* 5796 * keep a copy of psr.up (for reload) 5797 */ 5798 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5799 5800 /* 5801 * release ownership of this PMU. 5802 * PM interrupts are masked, so nothing 5803 * can happen. 5804 */ 5805 SET_PMU_OWNER(NULL, NULL); 5806 5807 /* 5808 * we systematically save the PMD as we have no 5809 * guarantee we will be schedule at that same 5810 * CPU again. 5811 */ 5812 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5813 5814 /* 5815 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5816 * we will need it on the restore path to check 5817 * for pending overflow. 5818 */ 5819 ctx->th_pmcs[0] = ia64_get_pmc(0); 5820 5821 /* 5822 * unfreeze PMU if had pending overflows 5823 */ 5824 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5825 5826 /* 5827 * finally, allow context access. 5828 * interrupts will still be masked after this call. 5829 */ 5830 pfm_unprotect_ctx_ctxsw(ctx, flags); 5831 } 5832 5833 #else /* !CONFIG_SMP */ 5834 void 5835 pfm_save_regs(struct task_struct *task) 5836 { 5837 pfm_context_t *ctx; 5838 u64 psr; 5839 5840 ctx = PFM_GET_CTX(task); 5841 if (ctx == NULL) return; 5842 5843 /* 5844 * save current PSR: needed because we modify it 5845 */ 5846 psr = pfm_get_psr(); 5847 5848 BUG_ON(psr & (IA64_PSR_I)); 5849 5850 /* 5851 * stop monitoring: 5852 * This is the last instruction which may generate an overflow 5853 * 5854 * We do not need to set psr.sp because, it is irrelevant in kernel. 5855 * It will be restored from ipsr when going back to user level 5856 */ 5857 pfm_clear_psr_up(); 5858 5859 /* 5860 * keep a copy of psr.up (for reload) 5861 */ 5862 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5863 } 5864 5865 static void 5866 pfm_lazy_save_regs (struct task_struct *task) 5867 { 5868 pfm_context_t *ctx; 5869 unsigned long flags; 5870 5871 { u64 psr = pfm_get_psr(); 5872 BUG_ON(psr & IA64_PSR_UP); 5873 } 5874 5875 ctx = PFM_GET_CTX(task); 5876 5877 /* 5878 * we need to mask PMU overflow here to 5879 * make sure that we maintain pmc0 until 5880 * we save it. overflow interrupts are 5881 * treated as spurious if there is no 5882 * owner. 5883 * 5884 * XXX: I don't think this is necessary 5885 */ 5886 PROTECT_CTX(ctx,flags); 5887 5888 /* 5889 * release ownership of this PMU. 5890 * must be done before we save the registers. 5891 * 5892 * after this call any PMU interrupt is treated 5893 * as spurious. 5894 */ 5895 SET_PMU_OWNER(NULL, NULL); 5896 5897 /* 5898 * save all the pmds we use 5899 */ 5900 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5901 5902 /* 5903 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5904 * it is needed to check for pended overflow 5905 * on the restore path 5906 */ 5907 ctx->th_pmcs[0] = ia64_get_pmc(0); 5908 5909 /* 5910 * unfreeze PMU if had pending overflows 5911 */ 5912 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5913 5914 /* 5915 * now get can unmask PMU interrupts, they will 5916 * be treated as purely spurious and we will not 5917 * lose any information 5918 */ 5919 UNPROTECT_CTX(ctx,flags); 5920 } 5921 #endif /* CONFIG_SMP */ 5922 5923 #ifdef CONFIG_SMP 5924 /* 5925 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5926 */ 5927 void 5928 pfm_load_regs (struct task_struct *task) 5929 { 5930 pfm_context_t *ctx; 5931 unsigned long pmc_mask = 0UL, pmd_mask = 0UL; 5932 unsigned long flags; 5933 u64 psr, psr_up; 5934 int need_irq_resend; 5935 5936 ctx = PFM_GET_CTX(task); 5937 if (unlikely(ctx == NULL)) return; 5938 5939 BUG_ON(GET_PMU_OWNER()); 5940 5941 /* 5942 * possible on unload 5943 */ 5944 if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; 5945 5946 /* 5947 * we always come here with interrupts ALREADY disabled by 5948 * the scheduler. So we simply need to protect against concurrent 5949 * access, not CPU concurrency. 5950 */ 5951 flags = pfm_protect_ctx_ctxsw(ctx); 5952 psr = pfm_get_psr(); 5953 5954 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 5955 5956 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 5957 BUG_ON(psr & IA64_PSR_I); 5958 5959 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { 5960 struct pt_regs *regs = task_pt_regs(task); 5961 5962 BUG_ON(ctx->ctx_smpl_hdr); 5963 5964 pfm_force_cleanup(ctx, regs); 5965 5966 pfm_unprotect_ctx_ctxsw(ctx, flags); 5967 5968 /* 5969 * this one (kmalloc'ed) is fine with interrupts disabled 5970 */ 5971 pfm_context_free(ctx); 5972 5973 return; 5974 } 5975 5976 /* 5977 * we restore ALL the debug registers to avoid picking up 5978 * stale state. 5979 */ 5980 if (ctx->ctx_fl_using_dbreg) { 5981 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 5982 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 5983 } 5984 /* 5985 * retrieve saved psr.up 5986 */ 5987 psr_up = ctx->ctx_saved_psr_up; 5988 5989 /* 5990 * if we were the last user of the PMU on that CPU, 5991 * then nothing to do except restore psr 5992 */ 5993 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { 5994 5995 /* 5996 * retrieve partial reload masks (due to user modifications) 5997 */ 5998 pmc_mask = ctx->ctx_reload_pmcs[0]; 5999 pmd_mask = ctx->ctx_reload_pmds[0]; 6000 6001 } else { 6002 /* 6003 * To avoid leaking information to the user level when psr.sp=0, 6004 * we must reload ALL implemented pmds (even the ones we don't use). 6005 * In the kernel we only allow PFM_READ_PMDS on registers which 6006 * we initialized or requested (sampling) so there is no risk there. 6007 */ 6008 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6009 6010 /* 6011 * ALL accessible PMCs are systematically reloaded, unused registers 6012 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6013 * up stale configuration. 6014 * 6015 * PMC0 is never in the mask. It is always restored separately. 6016 */ 6017 pmc_mask = ctx->ctx_all_pmcs[0]; 6018 } 6019 /* 6020 * when context is MASKED, we will restore PMC with plm=0 6021 * and PMD with stale information, but that's ok, nothing 6022 * will be captured. 6023 * 6024 * XXX: optimize here 6025 */ 6026 if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6027 if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6028 6029 /* 6030 * check for pending overflow at the time the state 6031 * was saved. 6032 */ 6033 if (unlikely(PMC0_HAS_OVFL(ctx->th_pmcs[0]))) { 6034 /* 6035 * reload pmc0 with the overflow information 6036 * On McKinley PMU, this will trigger a PMU interrupt 6037 */ 6038 ia64_set_pmc(0, ctx->th_pmcs[0]); 6039 ia64_srlz_d(); 6040 ctx->th_pmcs[0] = 0UL; 6041 6042 /* 6043 * will replay the PMU interrupt 6044 */ 6045 if (need_irq_resend) ia64_resend_irq(IA64_PERFMON_VECTOR); 6046 6047 pfm_stats[smp_processor_id()].pfm_replay_ovfl_intr_count++; 6048 } 6049 6050 /* 6051 * we just did a reload, so we reset the partial reload fields 6052 */ 6053 ctx->ctx_reload_pmcs[0] = 0UL; 6054 ctx->ctx_reload_pmds[0] = 0UL; 6055 6056 SET_LAST_CPU(ctx, smp_processor_id()); 6057 6058 /* 6059 * dump activation value for this PMU 6060 */ 6061 INC_ACTIVATION(); 6062 /* 6063 * record current activation for this context 6064 */ 6065 SET_ACTIVATION(ctx); 6066 6067 /* 6068 * establish new ownership. 6069 */ 6070 SET_PMU_OWNER(task, ctx); 6071 6072 /* 6073 * restore the psr.up bit. measurement 6074 * is active again. 6075 * no PMU interrupt can happen at this point 6076 * because we still have interrupts disabled. 6077 */ 6078 if (likely(psr_up)) pfm_set_psr_up(); 6079 6080 /* 6081 * allow concurrent access to context 6082 */ 6083 pfm_unprotect_ctx_ctxsw(ctx, flags); 6084 } 6085 #else /* !CONFIG_SMP */ 6086 /* 6087 * reload PMU state for UP kernels 6088 * in 2.5 we come here with interrupts disabled 6089 */ 6090 void 6091 pfm_load_regs (struct task_struct *task) 6092 { 6093 pfm_context_t *ctx; 6094 struct task_struct *owner; 6095 unsigned long pmd_mask, pmc_mask; 6096 u64 psr, psr_up; 6097 int need_irq_resend; 6098 6099 owner = GET_PMU_OWNER(); 6100 ctx = PFM_GET_CTX(task); 6101 psr = pfm_get_psr(); 6102 6103 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6104 BUG_ON(psr & IA64_PSR_I); 6105 6106 /* 6107 * we restore ALL the debug registers to avoid picking up 6108 * stale state. 6109 * 6110 * This must be done even when the task is still the owner 6111 * as the registers may have been modified via ptrace() 6112 * (not perfmon) by the previous task. 6113 */ 6114 if (ctx->ctx_fl_using_dbreg) { 6115 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6116 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6117 } 6118 6119 /* 6120 * retrieved saved psr.up 6121 */ 6122 psr_up = ctx->ctx_saved_psr_up; 6123 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6124 6125 /* 6126 * short path, our state is still there, just 6127 * need to restore psr and we go 6128 * 6129 * we do not touch either PMC nor PMD. the psr is not touched 6130 * by the overflow_handler. So we are safe w.r.t. to interrupt 6131 * concurrency even without interrupt masking. 6132 */ 6133 if (likely(owner == task)) { 6134 if (likely(psr_up)) pfm_set