1 /* 2 * This file implements the perfmon-2 subsystem which is used 3 * to program the IA-64 Performance Monitoring Unit (PMU). 4 * 5 * The initial version of perfmon.c was written by 6 * Ganesh Venkitachalam, IBM Corp. 7 * 8 * Then it was modified for perfmon-1.x by Stephane Eranian and 9 * David Mosberger, Hewlett Packard Co. 10 * 11 * Version Perfmon-2.x is a rewrite of perfmon-1.x 12 * by Stephane Eranian, Hewlett Packard Co. 13 * 14 * Copyright (C) 1999-2005 Hewlett Packard Co 15 * Stephane Eranian <eranian@hpl.hp.com> 16 * David Mosberger-Tang <davidm@hpl.hp.com> 17 * 18 * More information about perfmon available at: 19 * http://www.hpl.hp.com/research/linux/perfmon 20 */ 21 22 #include <linux/module.h> 23 #include <linux/kernel.h> 24 #include <linux/sched.h> 25 #include <linux/sched/task.h> 26 #include <linux/sched/task_stack.h> 27 #include <linux/interrupt.h> 28 #include <linux/proc_fs.h> 29 #include <linux/seq_file.h> 30 #include <linux/init.h> 31 #include <linux/vmalloc.h> 32 #include <linux/mm.h> 33 #include <linux/sysctl.h> 34 #include <linux/list.h> 35 #include <linux/file.h> 36 #include <linux/poll.h> 37 #include <linux/vfs.h> 38 #include <linux/smp.h> 39 #include <linux/pagemap.h> 40 #include <linux/mount.h> 41 #include <linux/bitops.h> 42 #include <linux/capability.h> 43 #include <linux/rcupdate.h> 44 #include <linux/completion.h> 45 #include <linux/tracehook.h> 46 #include <linux/slab.h> 47 #include <linux/cpu.h> 48 49 #include <asm/errno.h> 50 #include <asm/intrinsics.h> 51 #include <asm/page.h> 52 #include <asm/perfmon.h> 53 #include <asm/processor.h> 54 #include <asm/signal.h> 55 #include <linux/uaccess.h> 56 #include <asm/delay.h> 57 58 #ifdef CONFIG_PERFMON 59 /* 60 * perfmon context state 61 */ 62 #define PFM_CTX_UNLOADED 1 /* context is not loaded onto any task */ 63 #define PFM_CTX_LOADED 2 /* context is loaded onto a task */ 64 #define PFM_CTX_MASKED 3 /* context is loaded but monitoring is masked due to overflow */ 65 #define PFM_CTX_ZOMBIE 4 /* owner of the context is closing it */ 66 67 #define PFM_INVALID_ACTIVATION (~0UL) 68 69 #define PFM_NUM_PMC_REGS 64 /* PMC save area for ctxsw */ 70 #define PFM_NUM_PMD_REGS 64 /* PMD save area for ctxsw */ 71 72 /* 73 * depth of message queue 74 */ 75 #define PFM_MAX_MSGS 32 76 #define PFM_CTXQ_EMPTY(g) ((g)->ctx_msgq_head == (g)->ctx_msgq_tail) 77 78 /* 79 * type of a PMU register (bitmask). 80 * bitmask structure: 81 * bit0 : register implemented 82 * bit1 : end marker 83 * bit2-3 : reserved 84 * bit4 : pmc has pmc.pm 85 * bit5 : pmc controls a counter (has pmc.oi), pmd is used as counter 86 * bit6-7 : register type 87 * bit8-31: reserved 88 */ 89 #define PFM_REG_NOTIMPL 0x0 /* not implemented at all */ 90 #define PFM_REG_IMPL 0x1 /* register implemented */ 91 #define PFM_REG_END 0x2 /* end marker */ 92 #define PFM_REG_MONITOR (0x1<<4|PFM_REG_IMPL) /* a PMC with a pmc.pm field only */ 93 #define PFM_REG_COUNTING (0x2<<4|PFM_REG_MONITOR) /* a monitor + pmc.oi+ PMD used as a counter */ 94 #define PFM_REG_CONTROL (0x4<<4|PFM_REG_IMPL) /* PMU control register */ 95 #define PFM_REG_CONFIG (0x8<<4|PFM_REG_IMPL) /* configuration register */ 96 #define PFM_REG_BUFFER (0xc<<4|PFM_REG_IMPL) /* PMD used as buffer */ 97 98 #define PMC_IS_LAST(i) (pmu_conf->pmc_desc[i].type & PFM_REG_END) 99 #define PMD_IS_LAST(i) (pmu_conf->pmd_desc[i].type & PFM_REG_END) 100 101 #define PMC_OVFL_NOTIFY(ctx, i) ((ctx)->ctx_pmds[i].flags & PFM_REGFL_OVFL_NOTIFY) 102 103 /* i assumed unsigned */ 104 #define PMC_IS_IMPL(i) (i< PMU_MAX_PMCS && (pmu_conf->pmc_desc[i].type & PFM_REG_IMPL)) 105 #define PMD_IS_IMPL(i) (i< PMU_MAX_PMDS && (pmu_conf->pmd_desc[i].type & PFM_REG_IMPL)) 106 107 /* XXX: these assume that register i is implemented */ 108 #define PMD_IS_COUNTING(i) ((pmu_conf->pmd_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 109 #define PMC_IS_COUNTING(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_COUNTING) == PFM_REG_COUNTING) 110 #define PMC_IS_MONITOR(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_MONITOR) == PFM_REG_MONITOR) 111 #define PMC_IS_CONTROL(i) ((pmu_conf->pmc_desc[i].type & PFM_REG_CONTROL) == PFM_REG_CONTROL) 112 113 #define PMC_DFL_VAL(i) pmu_conf->pmc_desc[i].default_value 114 #define PMC_RSVD_MASK(i) pmu_conf->pmc_desc[i].reserved_mask 115 #define PMD_PMD_DEP(i) pmu_conf->pmd_desc[i].dep_pmd[0] 116 #define PMC_PMD_DEP(i) pmu_conf->pmc_desc[i].dep_pmd[0] 117 118 #define PFM_NUM_IBRS IA64_NUM_DBG_REGS 119 #define PFM_NUM_DBRS IA64_NUM_DBG_REGS 120 121 #define CTX_OVFL_NOBLOCK(c) ((c)->ctx_fl_block == 0) 122 #define CTX_HAS_SMPL(c) ((c)->ctx_fl_is_sampling) 123 #define PFM_CTX_TASK(h) (h)->ctx_task 124 125 #define PMU_PMC_OI 5 /* position of pmc.oi bit */ 126 127 /* XXX: does not support more than 64 PMDs */ 128 #define CTX_USED_PMD(ctx, mask) (ctx)->ctx_used_pmds[0] |= (mask) 129 #define CTX_IS_USED_PMD(ctx, c) (((ctx)->ctx_used_pmds[0] & (1UL << (c))) != 0UL) 130 131 #define CTX_USED_MONITOR(ctx, mask) (ctx)->ctx_used_monitors[0] |= (mask) 132 133 #define CTX_USED_IBR(ctx,n) (ctx)->ctx_used_ibrs[(n)>>6] |= 1UL<< ((n) % 64) 134 #define CTX_USED_DBR(ctx,n) (ctx)->ctx_used_dbrs[(n)>>6] |= 1UL<< ((n) % 64) 135 #define CTX_USES_DBREGS(ctx) (((pfm_context_t *)(ctx))->ctx_fl_using_dbreg==1) 136 #define PFM_CODE_RR 0 /* requesting code range restriction */ 137 #define PFM_DATA_RR 1 /* requestion data range restriction */ 138 139 #define PFM_CPUINFO_CLEAR(v) pfm_get_cpu_var(pfm_syst_info) &= ~(v) 140 #define PFM_CPUINFO_SET(v) pfm_get_cpu_var(pfm_syst_info) |= (v) 141 #define PFM_CPUINFO_GET() pfm_get_cpu_var(pfm_syst_info) 142 143 #define RDEP(x) (1UL<<(x)) 144 145 /* 146 * context protection macros 147 * in SMP: 148 * - we need to protect against CPU concurrency (spin_lock) 149 * - we need to protect against PMU overflow interrupts (local_irq_disable) 150 * in UP: 151 * - we need to protect against PMU overflow interrupts (local_irq_disable) 152 * 153 * spin_lock_irqsave()/spin_unlock_irqrestore(): 154 * in SMP: local_irq_disable + spin_lock 155 * in UP : local_irq_disable 156 * 157 * spin_lock()/spin_lock(): 158 * in UP : removed automatically 159 * in SMP: protect against context accesses from other CPU. interrupts 160 * are not masked. This is useful for the PMU interrupt handler 161 * because we know we will not get PMU concurrency in that code. 162 */ 163 #define PROTECT_CTX(c, f) \ 164 do { \ 165 DPRINT(("spinlock_irq_save ctx %p by [%d]\n", c, task_pid_nr(current))); \ 166 spin_lock_irqsave(&(c)->ctx_lock, f); \ 167 DPRINT(("spinlocked ctx %p by [%d]\n", c, task_pid_nr(current))); \ 168 } while(0) 169 170 #define UNPROTECT_CTX(c, f) \ 171 do { \ 172 DPRINT(("spinlock_irq_restore ctx %p by [%d]\n", c, task_pid_nr(current))); \ 173 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 174 } while(0) 175 176 #define PROTECT_CTX_NOPRINT(c, f) \ 177 do { \ 178 spin_lock_irqsave(&(c)->ctx_lock, f); \ 179 } while(0) 180 181 182 #define UNPROTECT_CTX_NOPRINT(c, f) \ 183 do { \ 184 spin_unlock_irqrestore(&(c)->ctx_lock, f); \ 185 } while(0) 186 187 188 #define PROTECT_CTX_NOIRQ(c) \ 189 do { \ 190 spin_lock(&(c)->ctx_lock); \ 191 } while(0) 192 193 #define UNPROTECT_CTX_NOIRQ(c) \ 194 do { \ 195 spin_unlock(&(c)->ctx_lock); \ 196 } while(0) 197 198 199 #ifdef CONFIG_SMP 200 201 #define GET_ACTIVATION() pfm_get_cpu_var(pmu_activation_number) 202 #define INC_ACTIVATION() pfm_get_cpu_var(pmu_activation_number)++ 203 #define SET_ACTIVATION(c) (c)->ctx_last_activation = GET_ACTIVATION() 204 205 #else /* !CONFIG_SMP */ 206 #define SET_ACTIVATION(t) do {} while(0) 207 #define GET_ACTIVATION(t) do {} while(0) 208 #define INC_ACTIVATION(t) do {} while(0) 209 #endif /* CONFIG_SMP */ 210 211 #define SET_PMU_OWNER(t, c) do { pfm_get_cpu_var(pmu_owner) = (t); pfm_get_cpu_var(pmu_ctx) = (c); } while(0) 212 #define GET_PMU_OWNER() pfm_get_cpu_var(pmu_owner) 213 #define GET_PMU_CTX() pfm_get_cpu_var(pmu_ctx) 214 215 #define LOCK_PFS(g) spin_lock_irqsave(&pfm_sessions.pfs_lock, g) 216 #define UNLOCK_PFS(g) spin_unlock_irqrestore(&pfm_sessions.pfs_lock, g) 217 218 #define PFM_REG_RETFLAG_SET(flags, val) do { flags &= ~PFM_REG_RETFL_MASK; flags |= (val); } while(0) 219 220 /* 221 * cmp0 must be the value of pmc0 222 */ 223 #define PMC0_HAS_OVFL(cmp0) (cmp0 & ~0x1UL) 224 225 #define PFMFS_MAGIC 0xa0b4d889 226 227 /* 228 * debugging 229 */ 230 #define PFM_DEBUGGING 1 231 #ifdef PFM_DEBUGGING 232 #define DPRINT(a) \ 233 do { \ 234 if (unlikely(pfm_sysctl.debug >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 235 } while (0) 236 237 #define DPRINT_ovfl(a) \ 238 do { \ 239 if (unlikely(pfm_sysctl.debug > 0 && pfm_sysctl.debug_ovfl >0)) { printk("%s.%d: CPU%d [%d] ", __func__, __LINE__, smp_processor_id(), task_pid_nr(current)); printk a; } \ 240 } while (0) 241 #endif 242 243 /* 244 * 64-bit software counter structure 245 * 246 * the next_reset_type is applied to the next call to pfm_reset_regs() 247 */ 248 typedef struct { 249 unsigned long val; /* virtual 64bit counter value */ 250 unsigned long lval; /* last reset value */ 251 unsigned long long_reset; /* reset value on sampling overflow */ 252 unsigned long short_reset; /* reset value on overflow */ 253 unsigned long reset_pmds[4]; /* which other pmds to reset when this counter overflows */ 254 unsigned long smpl_pmds[4]; /* which pmds are accessed when counter overflow */ 255 unsigned long seed; /* seed for random-number generator */ 256 unsigned long mask; /* mask for random-number generator */ 257 unsigned int flags; /* notify/do not notify */ 258 unsigned long eventid; /* overflow event identifier */ 259 } pfm_counter_t; 260 261 /* 262 * context flags 263 */ 264 typedef struct { 265 unsigned int block:1; /* when 1, task will blocked on user notifications */ 266 unsigned int system:1; /* do system wide monitoring */ 267 unsigned int using_dbreg:1; /* using range restrictions (debug registers) */ 268 unsigned int is_sampling:1; /* true if using a custom format */ 269 unsigned int excl_idle:1; /* exclude idle task in system wide session */ 270 unsigned int going_zombie:1; /* context is zombie (MASKED+blocking) */ 271 unsigned int trap_reason:2; /* reason for going into pfm_handle_work() */ 272 unsigned int no_msg:1; /* no message sent on overflow */ 273 unsigned int can_restart:1; /* allowed to issue a PFM_RESTART */ 274 unsigned int reserved:22; 275 } pfm_context_flags_t; 276 277 #define PFM_TRAP_REASON_NONE 0x0 /* default value */ 278 #define PFM_TRAP_REASON_BLOCK 0x1 /* we need to block on overflow */ 279 #define PFM_TRAP_REASON_RESET 0x2 /* we need to reset PMDs */ 280 281 282 /* 283 * perfmon context: encapsulates all the state of a monitoring session 284 */ 285 286 typedef struct pfm_context { 287 spinlock_t ctx_lock; /* context protection */ 288 289 pfm_context_flags_t ctx_flags; /* bitmask of flags (block reason incl.) */ 290 unsigned int ctx_state; /* state: active/inactive (no bitfield) */ 291 292 struct task_struct *ctx_task; /* task to which context is attached */ 293 294 unsigned long ctx_ovfl_regs[4]; /* which registers overflowed (notification) */ 295 296 struct completion ctx_restart_done; /* use for blocking notification mode */ 297 298 unsigned long ctx_used_pmds[4]; /* bitmask of PMD used */ 299 unsigned long ctx_all_pmds[4]; /* bitmask of all accessible PMDs */ 300 unsigned long ctx_reload_pmds[4]; /* bitmask of force reload PMD on ctxsw in */ 301 302 unsigned long ctx_all_pmcs[4]; /* bitmask of all accessible PMCs */ 303 unsigned long ctx_reload_pmcs[4]; /* bitmask of force reload PMC on ctxsw in */ 304 unsigned long ctx_used_monitors[4]; /* bitmask of monitor PMC being used */ 305 306 unsigned long ctx_pmcs[PFM_NUM_PMC_REGS]; /* saved copies of PMC values */ 307 308 unsigned int ctx_used_ibrs[1]; /* bitmask of used IBR (speedup ctxsw in) */ 309 unsigned int ctx_used_dbrs[1]; /* bitmask of used DBR (speedup ctxsw in) */ 310 unsigned long ctx_dbrs[IA64_NUM_DBG_REGS]; /* DBR values (cache) when not loaded */ 311 unsigned long ctx_ibrs[IA64_NUM_DBG_REGS]; /* IBR values (cache) when not loaded */ 312 313 pfm_counter_t ctx_pmds[PFM_NUM_PMD_REGS]; /* software state for PMDS */ 314 315 unsigned long th_pmcs[PFM_NUM_PMC_REGS]; /* PMC thread save state */ 316 unsigned long th_pmds[PFM_NUM_PMD_REGS]; /* PMD thread save state */ 317 318 unsigned long ctx_saved_psr_up; /* only contains psr.up value */ 319 320 unsigned long ctx_last_activation; /* context last activation number for last_cpu */ 321 unsigned int ctx_last_cpu; /* CPU id of current or last CPU used (SMP only) */ 322 unsigned int ctx_cpu; /* cpu to which perfmon is applied (system wide) */ 323 324 int ctx_fd; /* file descriptor used my this context */ 325 pfm_ovfl_arg_t ctx_ovfl_arg; /* argument to custom buffer format handler */ 326 327 pfm_buffer_fmt_t *ctx_buf_fmt; /* buffer format callbacks */ 328 void *ctx_smpl_hdr; /* points to sampling buffer header kernel vaddr */ 329 unsigned long ctx_smpl_size; /* size of sampling buffer */ 330 void *ctx_smpl_vaddr; /* user level virtual address of smpl buffer */ 331 332 wait_queue_head_t ctx_msgq_wait; 333 pfm_msg_t ctx_msgq[PFM_MAX_MSGS]; 334 int ctx_msgq_head; 335 int ctx_msgq_tail; 336 struct fasync_struct *ctx_async_queue; 337 338 wait_queue_head_t ctx_zombieq; /* termination cleanup wait queue */ 339 } pfm_context_t; 340 341 /* 342 * magic number used to verify that structure is really 343 * a perfmon context 344 */ 345 #define PFM_IS_FILE(f) ((f)->f_op == &pfm_file_ops) 346 347 #define PFM_GET_CTX(t) ((pfm_context_t *)(t)->thread.pfm_context) 348 349 #ifdef CONFIG_SMP 350 #define SET_LAST_CPU(ctx, v) (ctx)->ctx_last_cpu = (v) 351 #define GET_LAST_CPU(ctx) (ctx)->ctx_last_cpu 352 #else 353 #define SET_LAST_CPU(ctx, v) do {} while(0) 354 #define GET_LAST_CPU(ctx) do {} while(0) 355 #endif 356 357 358 #define ctx_fl_block ctx_flags.block 359 #define ctx_fl_system ctx_flags.system 360 #define ctx_fl_using_dbreg ctx_flags.using_dbreg 361 #define ctx_fl_is_sampling ctx_flags.is_sampling 362 #define ctx_fl_excl_idle ctx_flags.excl_idle 363 #define ctx_fl_going_zombie ctx_flags.going_zombie 364 #define ctx_fl_trap_reason ctx_flags.trap_reason 365 #define ctx_fl_no_msg ctx_flags.no_msg 366 #define ctx_fl_can_restart ctx_flags.can_restart 367 368 #define PFM_SET_WORK_PENDING(t, v) do { (t)->thread.pfm_needs_checking = v; } while(0); 369 #define PFM_GET_WORK_PENDING(t) (t)->thread.pfm_needs_checking 370 371 /* 372 * global information about all sessions 373 * mostly used to synchronize between system wide and per-process 374 */ 375 typedef struct { 376 spinlock_t pfs_lock; /* lock the structure */ 377 378 unsigned int pfs_task_sessions; /* number of per task sessions */ 379 unsigned int pfs_sys_sessions; /* number of per system wide sessions */ 380 unsigned int pfs_sys_use_dbregs; /* incremented when a system wide session uses debug regs */ 381 unsigned int pfs_ptrace_use_dbregs; /* incremented when a process uses debug regs */ 382 struct task_struct *pfs_sys_session[NR_CPUS]; /* point to task owning a system-wide session */ 383 } pfm_session_t; 384 385 /* 386 * information about a PMC or PMD. 387 * dep_pmd[]: a bitmask of dependent PMD registers 388 * dep_pmc[]: a bitmask of dependent PMC registers 389 */ 390 typedef int (*pfm_reg_check_t)(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); 391 typedef struct { 392 unsigned int type; 393 int pm_pos; 394 unsigned long default_value; /* power-on default value */ 395 unsigned long reserved_mask; /* bitmask of reserved bits */ 396 pfm_reg_check_t read_check; 397 pfm_reg_check_t write_check; 398 unsigned long dep_pmd[4]; 399 unsigned long dep_pmc[4]; 400 } pfm_reg_desc_t; 401 402 /* assume cnum is a valid monitor */ 403 #define PMC_PM(cnum, val) (((val) >> (pmu_conf->pmc_desc[cnum].pm_pos)) & 0x1) 404 405 /* 406 * This structure is initialized at boot time and contains 407 * a description of the PMU main characteristics. 408 * 409 * If the probe function is defined, detection is based 410 * on its return value: 411 * - 0 means recognized PMU 412 * - anything else means not supported 413 * When the probe function is not defined, then the pmu_family field 414 * is used and it must match the host CPU family such that: 415 * - cpu->family & config->pmu_family != 0 416 */ 417 typedef struct { 418 unsigned long ovfl_val; /* overflow value for counters */ 419 420 pfm_reg_desc_t *pmc_desc; /* detailed PMC register dependencies descriptions */ 421 pfm_reg_desc_t *pmd_desc; /* detailed PMD register dependencies descriptions */ 422 423 unsigned int num_pmcs; /* number of PMCS: computed at init time */ 424 unsigned int num_pmds; /* number of PMDS: computed at init time */ 425 unsigned long impl_pmcs[4]; /* bitmask of implemented PMCS */ 426 unsigned long impl_pmds[4]; /* bitmask of implemented PMDS */ 427 428 char *pmu_name; /* PMU family name */ 429 unsigned int pmu_family; /* cpuid family pattern used to identify pmu */ 430 unsigned int flags; /* pmu specific flags */ 431 unsigned int num_ibrs; /* number of IBRS: computed at init time */ 432 unsigned int num_dbrs; /* number of DBRS: computed at init time */ 433 unsigned int num_counters; /* PMC/PMD counting pairs : computed at init time */ 434 int (*probe)(void); /* customized probe routine */ 435 unsigned int use_rr_dbregs:1; /* set if debug registers used for range restriction */ 436 } pmu_config_t; 437 /* 438 * PMU specific flags 439 */ 440 #define PFM_PMU_IRQ_RESEND 1 /* PMU needs explicit IRQ resend */ 441 442 /* 443 * debug register related type definitions 444 */ 445 typedef struct { 446 unsigned long ibr_mask:56; 447 unsigned long ibr_plm:4; 448 unsigned long ibr_ig:3; 449 unsigned long ibr_x:1; 450 } ibr_mask_reg_t; 451 452 typedef struct { 453 unsigned long dbr_mask:56; 454 unsigned long dbr_plm:4; 455 unsigned long dbr_ig:2; 456 unsigned long dbr_w:1; 457 unsigned long dbr_r:1; 458 } dbr_mask_reg_t; 459 460 typedef union { 461 unsigned long val; 462 ibr_mask_reg_t ibr; 463 dbr_mask_reg_t dbr; 464 } dbreg_t; 465 466 467 /* 468 * perfmon command descriptions 469 */ 470 typedef struct { 471 int (*cmd_func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 472 char *cmd_name; 473 int cmd_flags; 474 unsigned int cmd_narg; 475 size_t cmd_argsize; 476 int (*cmd_getsize)(void *arg, size_t *sz); 477 } pfm_cmd_desc_t; 478 479 #define PFM_CMD_FD 0x01 /* command requires a file descriptor */ 480 #define PFM_CMD_ARG_READ 0x02 /* command must read argument(s) */ 481 #define PFM_CMD_ARG_RW 0x04 /* command must read/write argument(s) */ 482 #define PFM_CMD_STOP 0x08 /* command does not work on zombie context */ 483 484 485 #define PFM_CMD_NAME(cmd) pfm_cmd_tab[(cmd)].cmd_name 486 #define PFM_CMD_READ_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_READ) 487 #define PFM_CMD_RW_ARG(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_ARG_RW) 488 #define PFM_CMD_USE_FD(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_FD) 489 #define PFM_CMD_STOPPED(cmd) (pfm_cmd_tab[(cmd)].cmd_flags & PFM_CMD_STOP) 490 491 #define PFM_CMD_ARG_MANY -1 /* cannot be zero */ 492 493 typedef struct { 494 unsigned long pfm_spurious_ovfl_intr_count; /* keep track of spurious ovfl interrupts */ 495 unsigned long pfm_replay_ovfl_intr_count; /* keep track of replayed ovfl interrupts */ 496 unsigned long pfm_ovfl_intr_count; /* keep track of ovfl interrupts */ 497 unsigned long pfm_ovfl_intr_cycles; /* cycles spent processing ovfl interrupts */ 498 unsigned long pfm_ovfl_intr_cycles_min; /* min cycles spent processing ovfl interrupts */ 499 unsigned long pfm_ovfl_intr_cycles_max; /* max cycles spent processing ovfl interrupts */ 500 unsigned long pfm_smpl_handler_calls; 501 unsigned long pfm_smpl_handler_cycles; 502 char pad[SMP_CACHE_BYTES] ____cacheline_aligned; 503 } pfm_stats_t; 504 505 /* 506 * perfmon internal variables 507 */ 508 static pfm_stats_t pfm_stats[NR_CPUS]; 509 static pfm_session_t pfm_sessions; /* global sessions information */ 510 511 static DEFINE_SPINLOCK(pfm_alt_install_check); 512 static pfm_intr_handler_desc_t *pfm_alt_intr_handler; 513 514 static struct proc_dir_entry *perfmon_dir; 515 static pfm_uuid_t pfm_null_uuid = {0,}; 516 517 static spinlock_t pfm_buffer_fmt_lock; 518 static LIST_HEAD(pfm_buffer_fmt_list); 519 520 static pmu_config_t *pmu_conf; 521 522 /* sysctl() controls */ 523 pfm_sysctl_t pfm_sysctl; 524 EXPORT_SYMBOL(pfm_sysctl); 525 526 static struct ctl_table pfm_ctl_table[] = { 527 { 528 .procname = "debug", 529 .data = &pfm_sysctl.debug, 530 .maxlen = sizeof(int), 531 .mode = 0666, 532 .proc_handler = proc_dointvec, 533 }, 534 { 535 .procname = "debug_ovfl", 536 .data = &pfm_sysctl.debug_ovfl, 537 .maxlen = sizeof(int), 538 .mode = 0666, 539 .proc_handler = proc_dointvec, 540 }, 541 { 542 .procname = "fastctxsw", 543 .data = &pfm_sysctl.fastctxsw, 544 .maxlen = sizeof(int), 545 .mode = 0600, 546 .proc_handler = proc_dointvec, 547 }, 548 { 549 .procname = "expert_mode", 550 .data = &pfm_sysctl.expert_mode, 551 .maxlen = sizeof(int), 552 .mode = 0600, 553 .proc_handler = proc_dointvec, 554 }, 555 {} 556 }; 557 static struct ctl_table pfm_sysctl_dir[] = { 558 { 559 .procname = "perfmon", 560 .mode = 0555, 561 .child = pfm_ctl_table, 562 }, 563 {} 564 }; 565 static struct ctl_table pfm_sysctl_root[] = { 566 { 567 .procname = "kernel", 568 .mode = 0555, 569 .child = pfm_sysctl_dir, 570 }, 571 {} 572 }; 573 static struct ctl_table_header *pfm_sysctl_header; 574 575 static int pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 576 577 #define pfm_get_cpu_var(v) __ia64_per_cpu_var(v) 578 #define pfm_get_cpu_data(a,b) per_cpu(a, b) 579 580 static inline void 581 pfm_put_task(struct task_struct *task) 582 { 583 if (task != current) put_task_struct(task); 584 } 585 586 static inline void 587 pfm_reserve_page(unsigned long a) 588 { 589 SetPageReserved(vmalloc_to_page((void *)a)); 590 } 591 static inline void 592 pfm_unreserve_page(unsigned long a) 593 { 594 ClearPageReserved(vmalloc_to_page((void*)a)); 595 } 596 597 static inline unsigned long 598 pfm_protect_ctx_ctxsw(pfm_context_t *x) 599 { 600 spin_lock(&(x)->ctx_lock); 601 return 0UL; 602 } 603 604 static inline void 605 pfm_unprotect_ctx_ctxsw(pfm_context_t *x, unsigned long f) 606 { 607 spin_unlock(&(x)->ctx_lock); 608 } 609 610 /* forward declaration */ 611 static const struct dentry_operations pfmfs_dentry_operations; 612 613 static struct dentry * 614 pfmfs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) 615 { 616 return mount_pseudo(fs_type, "pfm:", NULL, &pfmfs_dentry_operations, 617 PFMFS_MAGIC); 618 } 619 620 static struct file_system_type pfm_fs_type = { 621 .name = "pfmfs", 622 .mount = pfmfs_mount, 623 .kill_sb = kill_anon_super, 624 }; 625 MODULE_ALIAS_FS("pfmfs"); 626 627 DEFINE_PER_CPU(unsigned long, pfm_syst_info); 628 DEFINE_PER_CPU(struct task_struct *, pmu_owner); 629 DEFINE_PER_CPU(pfm_context_t *, pmu_ctx); 630 DEFINE_PER_CPU(unsigned long, pmu_activation_number); 631 EXPORT_PER_CPU_SYMBOL_GPL(pfm_syst_info); 632 633 634 /* forward declaration */ 635 static const struct file_operations pfm_file_ops; 636 637 /* 638 * forward declarations 639 */ 640 #ifndef CONFIG_SMP 641 static void pfm_lazy_save_regs (struct task_struct *ta); 642 #endif 643 644 void dump_pmu_state(const char *); 645 static int pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 646 647 #include "perfmon_itanium.h" 648 #include "perfmon_mckinley.h" 649 #include "perfmon_montecito.h" 650 #include "perfmon_generic.h" 651 652 static pmu_config_t *pmu_confs[]={ 653 &pmu_conf_mont, 654 &pmu_conf_mck, 655 &pmu_conf_ita, 656 &pmu_conf_gen, /* must be last */ 657 NULL 658 }; 659 660 661 static int pfm_end_notify_user(pfm_context_t *ctx); 662 663 static inline void 664 pfm_clear_psr_pp(void) 665 { 666 ia64_rsm(IA64_PSR_PP); 667 ia64_srlz_i(); 668 } 669 670 static inline void 671 pfm_set_psr_pp(void) 672 { 673 ia64_ssm(IA64_PSR_PP); 674 ia64_srlz_i(); 675 } 676 677 static inline void 678 pfm_clear_psr_up(void) 679 { 680 ia64_rsm(IA64_PSR_UP); 681 ia64_srlz_i(); 682 } 683 684 static inline void 685 pfm_set_psr_up(void) 686 { 687 ia64_ssm(IA64_PSR_UP); 688 ia64_srlz_i(); 689 } 690 691 static inline unsigned long 692 pfm_get_psr(void) 693 { 694 unsigned long tmp; 695 tmp = ia64_getreg(_IA64_REG_PSR); 696 ia64_srlz_i(); 697 return tmp; 698 } 699 700 static inline void 701 pfm_set_psr_l(unsigned long val) 702 { 703 ia64_setreg(_IA64_REG_PSR_L, val); 704 ia64_srlz_i(); 705 } 706 707 static inline void 708 pfm_freeze_pmu(void) 709 { 710 ia64_set_pmc(0,1UL); 711 ia64_srlz_d(); 712 } 713 714 static inline void 715 pfm_unfreeze_pmu(void) 716 { 717 ia64_set_pmc(0,0UL); 718 ia64_srlz_d(); 719 } 720 721 static inline void 722 pfm_restore_ibrs(unsigned long *ibrs, unsigned int nibrs) 723 { 724 int i; 725 726 for (i=0; i < nibrs; i++) { 727 ia64_set_ibr(i, ibrs[i]); 728 ia64_dv_serialize_instruction(); 729 } 730 ia64_srlz_i(); 731 } 732 733 static inline void 734 pfm_restore_dbrs(unsigned long *dbrs, unsigned int ndbrs) 735 { 736 int i; 737 738 for (i=0; i < ndbrs; i++) { 739 ia64_set_dbr(i, dbrs[i]); 740 ia64_dv_serialize_data(); 741 } 742 ia64_srlz_d(); 743 } 744 745 /* 746 * PMD[i] must be a counter. no check is made 747 */ 748 static inline unsigned long 749 pfm_read_soft_counter(pfm_context_t *ctx, int i) 750 { 751 return ctx->ctx_pmds[i].val + (ia64_get_pmd(i) & pmu_conf->ovfl_val); 752 } 753 754 /* 755 * PMD[i] must be a counter. no check is made 756 */ 757 static inline void 758 pfm_write_soft_counter(pfm_context_t *ctx, int i, unsigned long val) 759 { 760 unsigned long ovfl_val = pmu_conf->ovfl_val; 761 762 ctx->ctx_pmds[i].val = val & ~ovfl_val; 763 /* 764 * writing to unimplemented part is ignore, so we do not need to 765 * mask off top part 766 */ 767 ia64_set_pmd(i, val & ovfl_val); 768 } 769 770 static pfm_msg_t * 771 pfm_get_new_msg(pfm_context_t *ctx) 772 { 773 int idx, next; 774 775 next = (ctx->ctx_msgq_tail+1) % PFM_MAX_MSGS; 776 777 DPRINT(("ctx_fd=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 778 if (next == ctx->ctx_msgq_head) return NULL; 779 780 idx = ctx->ctx_msgq_tail; 781 ctx->ctx_msgq_tail = next; 782 783 DPRINT(("ctx=%p head=%d tail=%d msg=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, idx)); 784 785 return ctx->ctx_msgq+idx; 786 } 787 788 static pfm_msg_t * 789 pfm_get_next_msg(pfm_context_t *ctx) 790 { 791 pfm_msg_t *msg; 792 793 DPRINT(("ctx=%p head=%d tail=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 794 795 if (PFM_CTXQ_EMPTY(ctx)) return NULL; 796 797 /* 798 * get oldest message 799 */ 800 msg = ctx->ctx_msgq+ctx->ctx_msgq_head; 801 802 /* 803 * and move forward 804 */ 805 ctx->ctx_msgq_head = (ctx->ctx_msgq_head+1) % PFM_MAX_MSGS; 806 807 DPRINT(("ctx=%p head=%d tail=%d type=%d\n", ctx, ctx->ctx_msgq_head, ctx->ctx_msgq_tail, msg->pfm_gen_msg.msg_type)); 808 809 return msg; 810 } 811 812 static void 813 pfm_reset_msgq(pfm_context_t *ctx) 814 { 815 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 816 DPRINT(("ctx=%p msgq reset\n", ctx)); 817 } 818 819 static void * 820 pfm_rvmalloc(unsigned long size) 821 { 822 void *mem; 823 unsigned long addr; 824 825 size = PAGE_ALIGN(size); 826 mem = vzalloc(size); 827 if (mem) { 828 //printk("perfmon: CPU%d pfm_rvmalloc(%ld)=%p\n", smp_processor_id(), size, mem); 829 addr = (unsigned long)mem; 830 while (size > 0) { 831 pfm_reserve_page(addr); 832 addr+=PAGE_SIZE; 833 size-=PAGE_SIZE; 834 } 835 } 836 return mem; 837 } 838 839 static void 840 pfm_rvfree(void *mem, unsigned long size) 841 { 842 unsigned long addr; 843 844 if (mem) { 845 DPRINT(("freeing physical buffer @%p size=%lu\n", mem, size)); 846 addr = (unsigned long) mem; 847 while ((long) size > 0) { 848 pfm_unreserve_page(addr); 849 addr+=PAGE_SIZE; 850 size-=PAGE_SIZE; 851 } 852 vfree(mem); 853 } 854 return; 855 } 856 857 static pfm_context_t * 858 pfm_context_alloc(int ctx_flags) 859 { 860 pfm_context_t *ctx; 861 862 /* 863 * allocate context descriptor 864 * must be able to free with interrupts disabled 865 */ 866 ctx = kzalloc(sizeof(pfm_context_t), GFP_KERNEL); 867 if (ctx) { 868 DPRINT(("alloc ctx @%p\n", ctx)); 869 870 /* 871 * init context protection lock 872 */ 873 spin_lock_init(&ctx->ctx_lock); 874 875 /* 876 * context is unloaded 877 */ 878 ctx->ctx_state = PFM_CTX_UNLOADED; 879 880 /* 881 * initialization of context's flags 882 */ 883 ctx->ctx_fl_block = (ctx_flags & PFM_FL_NOTIFY_BLOCK) ? 1 : 0; 884 ctx->ctx_fl_system = (ctx_flags & PFM_FL_SYSTEM_WIDE) ? 1: 0; 885 ctx->ctx_fl_no_msg = (ctx_flags & PFM_FL_OVFL_NO_MSG) ? 1: 0; 886 /* 887 * will move to set properties 888 * ctx->ctx_fl_excl_idle = (ctx_flags & PFM_FL_EXCL_IDLE) ? 1: 0; 889 */ 890 891 /* 892 * init restart semaphore to locked 893 */ 894 init_completion(&ctx->ctx_restart_done); 895 896 /* 897 * activation is used in SMP only 898 */ 899 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 900 SET_LAST_CPU(ctx, -1); 901 902 /* 903 * initialize notification message queue 904 */ 905 ctx->ctx_msgq_head = ctx->ctx_msgq_tail = 0; 906 init_waitqueue_head(&ctx->ctx_msgq_wait); 907 init_waitqueue_head(&ctx->ctx_zombieq); 908 909 } 910 return ctx; 911 } 912 913 static void 914 pfm_context_free(pfm_context_t *ctx) 915 { 916 if (ctx) { 917 DPRINT(("free ctx @%p\n", ctx)); 918 kfree(ctx); 919 } 920 } 921 922 static void 923 pfm_mask_monitoring(struct task_struct *task) 924 { 925 pfm_context_t *ctx = PFM_GET_CTX(task); 926 unsigned long mask, val, ovfl_mask; 927 int i; 928 929 DPRINT_ovfl(("masking monitoring for [%d]\n", task_pid_nr(task))); 930 931 ovfl_mask = pmu_conf->ovfl_val; 932 /* 933 * monitoring can only be masked as a result of a valid 934 * counter overflow. In UP, it means that the PMU still 935 * has an owner. Note that the owner can be different 936 * from the current task. However the PMU state belongs 937 * to the owner. 938 * In SMP, a valid overflow only happens when task is 939 * current. Therefore if we come here, we know that 940 * the PMU state belongs to the current task, therefore 941 * we can access the live registers. 942 * 943 * So in both cases, the live register contains the owner's 944 * state. We can ONLY touch the PMU registers and NOT the PSR. 945 * 946 * As a consequence to this call, the ctx->th_pmds[] array 947 * contains stale information which must be ignored 948 * when context is reloaded AND monitoring is active (see 949 * pfm_restart). 950 */ 951 mask = ctx->ctx_used_pmds[0]; 952 for (i = 0; mask; i++, mask>>=1) { 953 /* skip non used pmds */ 954 if ((mask & 0x1) == 0) continue; 955 val = ia64_get_pmd(i); 956 957 if (PMD_IS_COUNTING(i)) { 958 /* 959 * we rebuild the full 64 bit value of the counter 960 */ 961 ctx->ctx_pmds[i].val += (val & ovfl_mask); 962 } else { 963 ctx->ctx_pmds[i].val = val; 964 } 965 DPRINT_ovfl(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 966 i, 967 ctx->ctx_pmds[i].val, 968 val & ovfl_mask)); 969 } 970 /* 971 * mask monitoring by setting the privilege level to 0 972 * we cannot use psr.pp/psr.up for this, it is controlled by 973 * the user 974 * 975 * if task is current, modify actual registers, otherwise modify 976 * thread save state, i.e., what will be restored in pfm_load_regs() 977 */ 978 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 979 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 980 if ((mask & 0x1) == 0UL) continue; 981 ia64_set_pmc(i, ctx->th_pmcs[i] & ~0xfUL); 982 ctx->th_pmcs[i] &= ~0xfUL; 983 DPRINT_ovfl(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 984 } 985 /* 986 * make all of this visible 987 */ 988 ia64_srlz_d(); 989 } 990 991 /* 992 * must always be done with task == current 993 * 994 * context must be in MASKED state when calling 995 */ 996 static void 997 pfm_restore_monitoring(struct task_struct *task) 998 { 999 pfm_context_t *ctx = PFM_GET_CTX(task); 1000 unsigned long mask, ovfl_mask; 1001 unsigned long psr, val; 1002 int i, is_system; 1003 1004 is_system = ctx->ctx_fl_system; 1005 ovfl_mask = pmu_conf->ovfl_val; 1006 1007 if (task != current) { 1008 printk(KERN_ERR "perfmon.%d: invalid task[%d] current[%d]\n", __LINE__, task_pid_nr(task), task_pid_nr(current)); 1009 return; 1010 } 1011 if (ctx->ctx_state != PFM_CTX_MASKED) { 1012 printk(KERN_ERR "perfmon.%d: task[%d] current[%d] invalid state=%d\n", __LINE__, 1013 task_pid_nr(task), task_pid_nr(current), ctx->ctx_state); 1014 return; 1015 } 1016 psr = pfm_get_psr(); 1017 /* 1018 * monitoring is masked via the PMC. 1019 * As we restore their value, we do not want each counter to 1020 * restart right away. We stop monitoring using the PSR, 1021 * restore the PMC (and PMD) and then re-establish the psr 1022 * as it was. Note that there can be no pending overflow at 1023 * this point, because monitoring was MASKED. 1024 * 1025 * system-wide session are pinned and self-monitoring 1026 */ 1027 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1028 /* disable dcr pp */ 1029 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 1030 pfm_clear_psr_pp(); 1031 } else { 1032 pfm_clear_psr_up(); 1033 } 1034 /* 1035 * first, we restore the PMD 1036 */ 1037 mask = ctx->ctx_used_pmds[0]; 1038 for (i = 0; mask; i++, mask>>=1) { 1039 /* skip non used pmds */ 1040 if ((mask & 0x1) == 0) continue; 1041 1042 if (PMD_IS_COUNTING(i)) { 1043 /* 1044 * we split the 64bit value according to 1045 * counter width 1046 */ 1047 val = ctx->ctx_pmds[i].val & ovfl_mask; 1048 ctx->ctx_pmds[i].val &= ~ovfl_mask; 1049 } else { 1050 val = ctx->ctx_pmds[i].val; 1051 } 1052 ia64_set_pmd(i, val); 1053 1054 DPRINT(("pmd[%d]=0x%lx hw_pmd=0x%lx\n", 1055 i, 1056 ctx->ctx_pmds[i].val, 1057 val)); 1058 } 1059 /* 1060 * restore the PMCs 1061 */ 1062 mask = ctx->ctx_used_monitors[0] >> PMU_FIRST_COUNTER; 1063 for(i= PMU_FIRST_COUNTER; mask; i++, mask>>=1) { 1064 if ((mask & 0x1) == 0UL) continue; 1065 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1066 ia64_set_pmc(i, ctx->th_pmcs[i]); 1067 DPRINT(("[%d] pmc[%d]=0x%lx\n", 1068 task_pid_nr(task), i, ctx->th_pmcs[i])); 1069 } 1070 ia64_srlz_d(); 1071 1072 /* 1073 * must restore DBR/IBR because could be modified while masked 1074 * XXX: need to optimize 1075 */ 1076 if (ctx->ctx_fl_using_dbreg) { 1077 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 1078 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 1079 } 1080 1081 /* 1082 * now restore PSR 1083 */ 1084 if (is_system && (PFM_CPUINFO_GET() & PFM_CPUINFO_DCR_PP)) { 1085 /* enable dcr pp */ 1086 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 1087 ia64_srlz_i(); 1088 } 1089 pfm_set_psr_l(psr); 1090 } 1091 1092 static inline void 1093 pfm_save_pmds(unsigned long *pmds, unsigned long mask) 1094 { 1095 int i; 1096 1097 ia64_srlz_d(); 1098 1099 for (i=0; mask; i++, mask>>=1) { 1100 if (mask & 0x1) pmds[i] = ia64_get_pmd(i); 1101 } 1102 } 1103 1104 /* 1105 * reload from thread state (used for ctxw only) 1106 */ 1107 static inline void 1108 pfm_restore_pmds(unsigned long *pmds, unsigned long mask) 1109 { 1110 int i; 1111 unsigned long val, ovfl_val = pmu_conf->ovfl_val; 1112 1113 for (i=0; mask; i++, mask>>=1) { 1114 if ((mask & 0x1) == 0) continue; 1115 val = PMD_IS_COUNTING(i) ? pmds[i] & ovfl_val : pmds[i]; 1116 ia64_set_pmd(i, val); 1117 } 1118 ia64_srlz_d(); 1119 } 1120 1121 /* 1122 * propagate PMD from context to thread-state 1123 */ 1124 static inline void 1125 pfm_copy_pmds(struct task_struct *task, pfm_context_t *ctx) 1126 { 1127 unsigned long ovfl_val = pmu_conf->ovfl_val; 1128 unsigned long mask = ctx->ctx_all_pmds[0]; 1129 unsigned long val; 1130 int i; 1131 1132 DPRINT(("mask=0x%lx\n", mask)); 1133 1134 for (i=0; mask; i++, mask>>=1) { 1135 1136 val = ctx->ctx_pmds[i].val; 1137 1138 /* 1139 * We break up the 64 bit value into 2 pieces 1140 * the lower bits go to the machine state in the 1141 * thread (will be reloaded on ctxsw in). 1142 * The upper part stays in the soft-counter. 1143 */ 1144 if (PMD_IS_COUNTING(i)) { 1145 ctx->ctx_pmds[i].val = val & ~ovfl_val; 1146 val &= ovfl_val; 1147 } 1148 ctx->th_pmds[i] = val; 1149 1150 DPRINT(("pmd[%d]=0x%lx soft_val=0x%lx\n", 1151 i, 1152 ctx->th_pmds[i], 1153 ctx->ctx_pmds[i].val)); 1154 } 1155 } 1156 1157 /* 1158 * propagate PMC from context to thread-state 1159 */ 1160 static inline void 1161 pfm_copy_pmcs(struct task_struct *task, pfm_context_t *ctx) 1162 { 1163 unsigned long mask = ctx->ctx_all_pmcs[0]; 1164 int i; 1165 1166 DPRINT(("mask=0x%lx\n", mask)); 1167 1168 for (i=0; mask; i++, mask>>=1) { 1169 /* masking 0 with ovfl_val yields 0 */ 1170 ctx->th_pmcs[i] = ctx->ctx_pmcs[i]; 1171 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->th_pmcs[i])); 1172 } 1173 } 1174 1175 1176 1177 static inline void 1178 pfm_restore_pmcs(unsigned long *pmcs, unsigned long mask) 1179 { 1180 int i; 1181 1182 for (i=0; mask; i++, mask>>=1) { 1183 if ((mask & 0x1) == 0) continue; 1184 ia64_set_pmc(i, pmcs[i]); 1185 } 1186 ia64_srlz_d(); 1187 } 1188 1189 static inline int 1190 pfm_uuid_cmp(pfm_uuid_t a, pfm_uuid_t b) 1191 { 1192 return memcmp(a, b, sizeof(pfm_uuid_t)); 1193 } 1194 1195 static inline int 1196 pfm_buf_fmt_exit(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, struct pt_regs *regs) 1197 { 1198 int ret = 0; 1199 if (fmt->fmt_exit) ret = (*fmt->fmt_exit)(task, buf, regs); 1200 return ret; 1201 } 1202 1203 static inline int 1204 pfm_buf_fmt_getsize(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, int cpu, void *arg, unsigned long *size) 1205 { 1206 int ret = 0; 1207 if (fmt->fmt_getsize) ret = (*fmt->fmt_getsize)(task, flags, cpu, arg, size); 1208 return ret; 1209 } 1210 1211 1212 static inline int 1213 pfm_buf_fmt_validate(pfm_buffer_fmt_t *fmt, struct task_struct *task, unsigned int flags, 1214 int cpu, void *arg) 1215 { 1216 int ret = 0; 1217 if (fmt->fmt_validate) ret = (*fmt->fmt_validate)(task, flags, cpu, arg); 1218 return ret; 1219 } 1220 1221 static inline int 1222 pfm_buf_fmt_init(pfm_buffer_fmt_t *fmt, struct task_struct *task, void *buf, unsigned int flags, 1223 int cpu, void *arg) 1224 { 1225 int ret = 0; 1226 if (fmt->fmt_init) ret = (*fmt->fmt_init)(task, buf, flags, cpu, arg); 1227 return ret; 1228 } 1229 1230 static inline int 1231 pfm_buf_fmt_restart(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1232 { 1233 int ret = 0; 1234 if (fmt->fmt_restart) ret = (*fmt->fmt_restart)(task, ctrl, buf, regs); 1235 return ret; 1236 } 1237 1238 static inline int 1239 pfm_buf_fmt_restart_active(pfm_buffer_fmt_t *fmt, struct task_struct *task, pfm_ovfl_ctrl_t *ctrl, void *buf, struct pt_regs *regs) 1240 { 1241 int ret = 0; 1242 if (fmt->fmt_restart_active) ret = (*fmt->fmt_restart_active)(task, ctrl, buf, regs); 1243 return ret; 1244 } 1245 1246 static pfm_buffer_fmt_t * 1247 __pfm_find_buffer_fmt(pfm_uuid_t uuid) 1248 { 1249 struct list_head * pos; 1250 pfm_buffer_fmt_t * entry; 1251 1252 list_for_each(pos, &pfm_buffer_fmt_list) { 1253 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 1254 if (pfm_uuid_cmp(uuid, entry->fmt_uuid) == 0) 1255 return entry; 1256 } 1257 return NULL; 1258 } 1259 1260 /* 1261 * find a buffer format based on its uuid 1262 */ 1263 static pfm_buffer_fmt_t * 1264 pfm_find_buffer_fmt(pfm_uuid_t uuid) 1265 { 1266 pfm_buffer_fmt_t * fmt; 1267 spin_lock(&pfm_buffer_fmt_lock); 1268 fmt = __pfm_find_buffer_fmt(uuid); 1269 spin_unlock(&pfm_buffer_fmt_lock); 1270 return fmt; 1271 } 1272 1273 int 1274 pfm_register_buffer_fmt(pfm_buffer_fmt_t *fmt) 1275 { 1276 int ret = 0; 1277 1278 /* some sanity checks */ 1279 if (fmt == NULL || fmt->fmt_name == NULL) return -EINVAL; 1280 1281 /* we need at least a handler */ 1282 if (fmt->fmt_handler == NULL) return -EINVAL; 1283 1284 /* 1285 * XXX: need check validity of fmt_arg_size 1286 */ 1287 1288 spin_lock(&pfm_buffer_fmt_lock); 1289 1290 if (__pfm_find_buffer_fmt(fmt->fmt_uuid)) { 1291 printk(KERN_ERR "perfmon: duplicate sampling format: %s\n", fmt->fmt_name); 1292 ret = -EBUSY; 1293 goto out; 1294 } 1295 list_add(&fmt->fmt_list, &pfm_buffer_fmt_list); 1296 printk(KERN_INFO "perfmon: added sampling format %s\n", fmt->fmt_name); 1297 1298 out: 1299 spin_unlock(&pfm_buffer_fmt_lock); 1300 return ret; 1301 } 1302 EXPORT_SYMBOL(pfm_register_buffer_fmt); 1303 1304 int 1305 pfm_unregister_buffer_fmt(pfm_uuid_t uuid) 1306 { 1307 pfm_buffer_fmt_t *fmt; 1308 int ret = 0; 1309 1310 spin_lock(&pfm_buffer_fmt_lock); 1311 1312 fmt = __pfm_find_buffer_fmt(uuid); 1313 if (!fmt) { 1314 printk(KERN_ERR "perfmon: cannot unregister format, not found\n"); 1315 ret = -EINVAL; 1316 goto out; 1317 } 1318 list_del_init(&fmt->fmt_list); 1319 printk(KERN_INFO "perfmon: removed sampling format: %s\n", fmt->fmt_name); 1320 1321 out: 1322 spin_unlock(&pfm_buffer_fmt_lock); 1323 return ret; 1324 1325 } 1326 EXPORT_SYMBOL(pfm_unregister_buffer_fmt); 1327 1328 static int 1329 pfm_reserve_session(struct task_struct *task, int is_syswide, unsigned int cpu) 1330 { 1331 unsigned long flags; 1332 /* 1333 * validity checks on cpu_mask have been done upstream 1334 */ 1335 LOCK_PFS(flags); 1336 1337 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1338 pfm_sessions.pfs_sys_sessions, 1339 pfm_sessions.pfs_task_sessions, 1340 pfm_sessions.pfs_sys_use_dbregs, 1341 is_syswide, 1342 cpu)); 1343 1344 if (is_syswide) { 1345 /* 1346 * cannot mix system wide and per-task sessions 1347 */ 1348 if (pfm_sessions.pfs_task_sessions > 0UL) { 1349 DPRINT(("system wide not possible, %u conflicting task_sessions\n", 1350 pfm_sessions.pfs_task_sessions)); 1351 goto abort; 1352 } 1353 1354 if (pfm_sessions.pfs_sys_session[cpu]) goto error_conflict; 1355 1356 DPRINT(("reserving system wide session on CPU%u currently on CPU%u\n", cpu, smp_processor_id())); 1357 1358 pfm_sessions.pfs_sys_session[cpu] = task; 1359 1360 pfm_sessions.pfs_sys_sessions++ ; 1361 1362 } else { 1363 if (pfm_sessions.pfs_sys_sessions) goto abort; 1364 pfm_sessions.pfs_task_sessions++; 1365 } 1366 1367 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1368 pfm_sessions.pfs_sys_sessions, 1369 pfm_sessions.pfs_task_sessions, 1370 pfm_sessions.pfs_sys_use_dbregs, 1371 is_syswide, 1372 cpu)); 1373 1374 /* 1375 * Force idle() into poll mode 1376 */ 1377 cpu_idle_poll_ctrl(true); 1378 1379 UNLOCK_PFS(flags); 1380 1381 return 0; 1382 1383 error_conflict: 1384 DPRINT(("system wide not possible, conflicting session [%d] on CPU%d\n", 1385 task_pid_nr(pfm_sessions.pfs_sys_session[cpu]), 1386 cpu)); 1387 abort: 1388 UNLOCK_PFS(flags); 1389 1390 return -EBUSY; 1391 1392 } 1393 1394 static int 1395 pfm_unreserve_session(pfm_context_t *ctx, int is_syswide, unsigned int cpu) 1396 { 1397 unsigned long flags; 1398 /* 1399 * validity checks on cpu_mask have been done upstream 1400 */ 1401 LOCK_PFS(flags); 1402 1403 DPRINT(("in sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1404 pfm_sessions.pfs_sys_sessions, 1405 pfm_sessions.pfs_task_sessions, 1406 pfm_sessions.pfs_sys_use_dbregs, 1407 is_syswide, 1408 cpu)); 1409 1410 1411 if (is_syswide) { 1412 pfm_sessions.pfs_sys_session[cpu] = NULL; 1413 /* 1414 * would not work with perfmon+more than one bit in cpu_mask 1415 */ 1416 if (ctx && ctx->ctx_fl_using_dbreg) { 1417 if (pfm_sessions.pfs_sys_use_dbregs == 0) { 1418 printk(KERN_ERR "perfmon: invalid release for ctx %p sys_use_dbregs=0\n", ctx); 1419 } else { 1420 pfm_sessions.pfs_sys_use_dbregs--; 1421 } 1422 } 1423 pfm_sessions.pfs_sys_sessions--; 1424 } else { 1425 pfm_sessions.pfs_task_sessions--; 1426 } 1427 DPRINT(("out sys_sessions=%u task_sessions=%u dbregs=%u syswide=%d cpu=%u\n", 1428 pfm_sessions.pfs_sys_sessions, 1429 pfm_sessions.pfs_task_sessions, 1430 pfm_sessions.pfs_sys_use_dbregs, 1431 is_syswide, 1432 cpu)); 1433 1434 /* Undo forced polling. Last session reenables pal_halt */ 1435 cpu_idle_poll_ctrl(false); 1436 1437 UNLOCK_PFS(flags); 1438 1439 return 0; 1440 } 1441 1442 /* 1443 * removes virtual mapping of the sampling buffer. 1444 * IMPORTANT: cannot be called with interrupts disable, e.g. inside 1445 * a PROTECT_CTX() section. 1446 */ 1447 static int 1448 pfm_remove_smpl_mapping(void *vaddr, unsigned long size) 1449 { 1450 struct task_struct *task = current; 1451 int r; 1452 1453 /* sanity checks */ 1454 if (task->mm == NULL || size == 0UL || vaddr == NULL) { 1455 printk(KERN_ERR "perfmon: pfm_remove_smpl_mapping [%d] invalid context mm=%p\n", task_pid_nr(task), task->mm); 1456 return -EINVAL; 1457 } 1458 1459 DPRINT(("smpl_vaddr=%p size=%lu\n", vaddr, size)); 1460 1461 /* 1462 * does the actual unmapping 1463 */ 1464 r = vm_munmap((unsigned long)vaddr, size); 1465 1466 if (r !=0) { 1467 printk(KERN_ERR "perfmon: [%d] unable to unmap sampling buffer @%p size=%lu\n", task_pid_nr(task), vaddr, size); 1468 } 1469 1470 DPRINT(("do_unmap(%p, %lu)=%d\n", vaddr, size, r)); 1471 1472 return 0; 1473 } 1474 1475 /* 1476 * free actual physical storage used by sampling buffer 1477 */ 1478 #if 0 1479 static int 1480 pfm_free_smpl_buffer(pfm_context_t *ctx) 1481 { 1482 pfm_buffer_fmt_t *fmt; 1483 1484 if (ctx->ctx_smpl_hdr == NULL) goto invalid_free; 1485 1486 /* 1487 * we won't use the buffer format anymore 1488 */ 1489 fmt = ctx->ctx_buf_fmt; 1490 1491 DPRINT(("sampling buffer @%p size %lu vaddr=%p\n", 1492 ctx->ctx_smpl_hdr, 1493 ctx->ctx_smpl_size, 1494 ctx->ctx_smpl_vaddr)); 1495 1496 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1497 1498 /* 1499 * free the buffer 1500 */ 1501 pfm_rvfree(ctx->ctx_smpl_hdr, ctx->ctx_smpl_size); 1502 1503 ctx->ctx_smpl_hdr = NULL; 1504 ctx->ctx_smpl_size = 0UL; 1505 1506 return 0; 1507 1508 invalid_free: 1509 printk(KERN_ERR "perfmon: pfm_free_smpl_buffer [%d] no buffer\n", task_pid_nr(current)); 1510 return -EINVAL; 1511 } 1512 #endif 1513 1514 static inline void 1515 pfm_exit_smpl_buffer(pfm_buffer_fmt_t *fmt) 1516 { 1517 if (fmt == NULL) return; 1518 1519 pfm_buf_fmt_exit(fmt, current, NULL, NULL); 1520 1521 } 1522 1523 /* 1524 * pfmfs should _never_ be mounted by userland - too much of security hassle, 1525 * no real gain from having the whole whorehouse mounted. So we don't need 1526 * any operations on the root directory. However, we need a non-trivial 1527 * d_name - pfm: will go nicely and kill the special-casing in procfs. 1528 */ 1529 static struct vfsmount *pfmfs_mnt __read_mostly; 1530 1531 static int __init 1532 init_pfm_fs(void) 1533 { 1534 int err = register_filesystem(&pfm_fs_type); 1535 if (!err) { 1536 pfmfs_mnt = kern_mount(&pfm_fs_type); 1537 err = PTR_ERR(pfmfs_mnt); 1538 if (IS_ERR(pfmfs_mnt)) 1539 unregister_filesystem(&pfm_fs_type); 1540 else 1541 err = 0; 1542 } 1543 return err; 1544 } 1545 1546 static ssize_t 1547 pfm_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos) 1548 { 1549 pfm_context_t *ctx; 1550 pfm_msg_t *msg; 1551 ssize_t ret; 1552 unsigned long flags; 1553 DECLARE_WAITQUEUE(wait, current); 1554 if (PFM_IS_FILE(filp) == 0) { 1555 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1556 return -EINVAL; 1557 } 1558 1559 ctx = filp->private_data; 1560 if (ctx == NULL) { 1561 printk(KERN_ERR "perfmon: pfm_read: NULL ctx [%d]\n", task_pid_nr(current)); 1562 return -EINVAL; 1563 } 1564 1565 /* 1566 * check even when there is no message 1567 */ 1568 if (size < sizeof(pfm_msg_t)) { 1569 DPRINT(("message is too small ctx=%p (>=%ld)\n", ctx, sizeof(pfm_msg_t))); 1570 return -EINVAL; 1571 } 1572 1573 PROTECT_CTX(ctx, flags); 1574 1575 /* 1576 * put ourselves on the wait queue 1577 */ 1578 add_wait_queue(&ctx->ctx_msgq_wait, &wait); 1579 1580 1581 for(;;) { 1582 /* 1583 * check wait queue 1584 */ 1585 1586 set_current_state(TASK_INTERRUPTIBLE); 1587 1588 DPRINT(("head=%d tail=%d\n", ctx->ctx_msgq_head, ctx->ctx_msgq_tail)); 1589 1590 ret = 0; 1591 if(PFM_CTXQ_EMPTY(ctx) == 0) break; 1592 1593 UNPROTECT_CTX(ctx, flags); 1594 1595 /* 1596 * check non-blocking read 1597 */ 1598 ret = -EAGAIN; 1599 if(filp->f_flags & O_NONBLOCK) break; 1600 1601 /* 1602 * check pending signals 1603 */ 1604 if(signal_pending(current)) { 1605 ret = -EINTR; 1606 break; 1607 } 1608 /* 1609 * no message, so wait 1610 */ 1611 schedule(); 1612 1613 PROTECT_CTX(ctx, flags); 1614 } 1615 DPRINT(("[%d] back to running ret=%ld\n", task_pid_nr(current), ret)); 1616 set_current_state(TASK_RUNNING); 1617 remove_wait_queue(&ctx->ctx_msgq_wait, &wait); 1618 1619 if (ret < 0) goto abort; 1620 1621 ret = -EINVAL; 1622 msg = pfm_get_next_msg(ctx); 1623 if (msg == NULL) { 1624 printk(KERN_ERR "perfmon: pfm_read no msg for ctx=%p [%d]\n", ctx, task_pid_nr(current)); 1625 goto abort_locked; 1626 } 1627 1628 DPRINT(("fd=%d type=%d\n", msg->pfm_gen_msg.msg_ctx_fd, msg->pfm_gen_msg.msg_type)); 1629 1630 ret = -EFAULT; 1631 if(copy_to_user(buf, msg, sizeof(pfm_msg_t)) == 0) ret = sizeof(pfm_msg_t); 1632 1633 abort_locked: 1634 UNPROTECT_CTX(ctx, flags); 1635 abort: 1636 return ret; 1637 } 1638 1639 static ssize_t 1640 pfm_write(struct file *file, const char __user *ubuf, 1641 size_t size, loff_t *ppos) 1642 { 1643 DPRINT(("pfm_write called\n")); 1644 return -EINVAL; 1645 } 1646 1647 static __poll_t 1648 pfm_poll(struct file *filp, poll_table * wait) 1649 { 1650 pfm_context_t *ctx; 1651 unsigned long flags; 1652 __poll_t mask = 0; 1653 1654 if (PFM_IS_FILE(filp) == 0) { 1655 printk(KERN_ERR "perfmon: pfm_poll: bad magic [%d]\n", task_pid_nr(current)); 1656 return 0; 1657 } 1658 1659 ctx = filp->private_data; 1660 if (ctx == NULL) { 1661 printk(KERN_ERR "perfmon: pfm_poll: NULL ctx [%d]\n", task_pid_nr(current)); 1662 return 0; 1663 } 1664 1665 1666 DPRINT(("pfm_poll ctx_fd=%d before poll_wait\n", ctx->ctx_fd)); 1667 1668 poll_wait(filp, &ctx->ctx_msgq_wait, wait); 1669 1670 PROTECT_CTX(ctx, flags); 1671 1672 if (PFM_CTXQ_EMPTY(ctx) == 0) 1673 mask = EPOLLIN | EPOLLRDNORM; 1674 1675 UNPROTECT_CTX(ctx, flags); 1676 1677 DPRINT(("pfm_poll ctx_fd=%d mask=0x%x\n", ctx->ctx_fd, mask)); 1678 1679 return mask; 1680 } 1681 1682 static long 1683 pfm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 1684 { 1685 DPRINT(("pfm_ioctl called\n")); 1686 return -EINVAL; 1687 } 1688 1689 /* 1690 * interrupt cannot be masked when coming here 1691 */ 1692 static inline int 1693 pfm_do_fasync(int fd, struct file *filp, pfm_context_t *ctx, int on) 1694 { 1695 int ret; 1696 1697 ret = fasync_helper (fd, filp, on, &ctx->ctx_async_queue); 1698 1699 DPRINT(("pfm_fasync called by [%d] on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1700 task_pid_nr(current), 1701 fd, 1702 on, 1703 ctx->ctx_async_queue, ret)); 1704 1705 return ret; 1706 } 1707 1708 static int 1709 pfm_fasync(int fd, struct file *filp, int on) 1710 { 1711 pfm_context_t *ctx; 1712 int ret; 1713 1714 if (PFM_IS_FILE(filp) == 0) { 1715 printk(KERN_ERR "perfmon: pfm_fasync bad magic [%d]\n", task_pid_nr(current)); 1716 return -EBADF; 1717 } 1718 1719 ctx = filp->private_data; 1720 if (ctx == NULL) { 1721 printk(KERN_ERR "perfmon: pfm_fasync NULL ctx [%d]\n", task_pid_nr(current)); 1722 return -EBADF; 1723 } 1724 /* 1725 * we cannot mask interrupts during this call because this may 1726 * may go to sleep if memory is not readily avalaible. 1727 * 1728 * We are protected from the conetxt disappearing by the get_fd()/put_fd() 1729 * done in caller. Serialization of this function is ensured by caller. 1730 */ 1731 ret = pfm_do_fasync(fd, filp, ctx, on); 1732 1733 1734 DPRINT(("pfm_fasync called on ctx_fd=%d on=%d async_queue=%p ret=%d\n", 1735 fd, 1736 on, 1737 ctx->ctx_async_queue, ret)); 1738 1739 return ret; 1740 } 1741 1742 #ifdef CONFIG_SMP 1743 /* 1744 * this function is exclusively called from pfm_close(). 1745 * The context is not protected at that time, nor are interrupts 1746 * on the remote CPU. That's necessary to avoid deadlocks. 1747 */ 1748 static void 1749 pfm_syswide_force_stop(void *info) 1750 { 1751 pfm_context_t *ctx = (pfm_context_t *)info; 1752 struct pt_regs *regs = task_pt_regs(current); 1753 struct task_struct *owner; 1754 unsigned long flags; 1755 int ret; 1756 1757 if (ctx->ctx_cpu != smp_processor_id()) { 1758 printk(KERN_ERR "perfmon: pfm_syswide_force_stop for CPU%d but on CPU%d\n", 1759 ctx->ctx_cpu, 1760 smp_processor_id()); 1761 return; 1762 } 1763 owner = GET_PMU_OWNER(); 1764 if (owner != ctx->ctx_task) { 1765 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected owner [%d] instead of [%d]\n", 1766 smp_processor_id(), 1767 task_pid_nr(owner), task_pid_nr(ctx->ctx_task)); 1768 return; 1769 } 1770 if (GET_PMU_CTX() != ctx) { 1771 printk(KERN_ERR "perfmon: pfm_syswide_force_stop CPU%d unexpected ctx %p instead of %p\n", 1772 smp_processor_id(), 1773 GET_PMU_CTX(), ctx); 1774 return; 1775 } 1776 1777 DPRINT(("on CPU%d forcing system wide stop for [%d]\n", smp_processor_id(), task_pid_nr(ctx->ctx_task))); 1778 /* 1779 * the context is already protected in pfm_close(), we simply 1780 * need to mask interrupts to avoid a PMU interrupt race on 1781 * this CPU 1782 */ 1783 local_irq_save(flags); 1784 1785 ret = pfm_context_unload(ctx, NULL, 0, regs); 1786 if (ret) { 1787 DPRINT(("context_unload returned %d\n", ret)); 1788 } 1789 1790 /* 1791 * unmask interrupts, PMU interrupts are now spurious here 1792 */ 1793 local_irq_restore(flags); 1794 } 1795 1796 static void 1797 pfm_syswide_cleanup_other_cpu(pfm_context_t *ctx) 1798 { 1799 int ret; 1800 1801 DPRINT(("calling CPU%d for cleanup\n", ctx->ctx_cpu)); 1802 ret = smp_call_function_single(ctx->ctx_cpu, pfm_syswide_force_stop, ctx, 1); 1803 DPRINT(("called CPU%d for cleanup ret=%d\n", ctx->ctx_cpu, ret)); 1804 } 1805 #endif /* CONFIG_SMP */ 1806 1807 /* 1808 * called for each close(). Partially free resources. 1809 * When caller is self-monitoring, the context is unloaded. 1810 */ 1811 static int 1812 pfm_flush(struct file *filp, fl_owner_t id) 1813 { 1814 pfm_context_t *ctx; 1815 struct task_struct *task; 1816 struct pt_regs *regs; 1817 unsigned long flags; 1818 unsigned long smpl_buf_size = 0UL; 1819 void *smpl_buf_vaddr = NULL; 1820 int state, is_system; 1821 1822 if (PFM_IS_FILE(filp) == 0) { 1823 DPRINT(("bad magic for\n")); 1824 return -EBADF; 1825 } 1826 1827 ctx = filp->private_data; 1828 if (ctx == NULL) { 1829 printk(KERN_ERR "perfmon: pfm_flush: NULL ctx [%d]\n", task_pid_nr(current)); 1830 return -EBADF; 1831 } 1832 1833 /* 1834 * remove our file from the async queue, if we use this mode. 1835 * This can be done without the context being protected. We come 1836 * here when the context has become unreachable by other tasks. 1837 * 1838 * We may still have active monitoring at this point and we may 1839 * end up in pfm_overflow_handler(). However, fasync_helper() 1840 * operates with interrupts disabled and it cleans up the 1841 * queue. If the PMU handler is called prior to entering 1842 * fasync_helper() then it will send a signal. If it is 1843 * invoked after, it will find an empty queue and no 1844 * signal will be sent. In both case, we are safe 1845 */ 1846 PROTECT_CTX(ctx, flags); 1847 1848 state = ctx->ctx_state; 1849 is_system = ctx->ctx_fl_system; 1850 1851 task = PFM_CTX_TASK(ctx); 1852 regs = task_pt_regs(task); 1853 1854 DPRINT(("ctx_state=%d is_current=%d\n", 1855 state, 1856 task == current ? 1 : 0)); 1857 1858 /* 1859 * if state == UNLOADED, then task is NULL 1860 */ 1861 1862 /* 1863 * we must stop and unload because we are losing access to the context. 1864 */ 1865 if (task == current) { 1866 #ifdef CONFIG_SMP 1867 /* 1868 * the task IS the owner but it migrated to another CPU: that's bad 1869 * but we must handle this cleanly. Unfortunately, the kernel does 1870 * not provide a mechanism to block migration (while the context is loaded). 1871 * 1872 * We need to release the resource on the ORIGINAL cpu. 1873 */ 1874 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 1875 1876 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 1877 /* 1878 * keep context protected but unmask interrupt for IPI 1879 */ 1880 local_irq_restore(flags); 1881 1882 pfm_syswide_cleanup_other_cpu(ctx); 1883 1884 /* 1885 * restore interrupt masking 1886 */ 1887 local_irq_save(flags); 1888 1889 /* 1890 * context is unloaded at this point 1891 */ 1892 } else 1893 #endif /* CONFIG_SMP */ 1894 { 1895 1896 DPRINT(("forcing unload\n")); 1897 /* 1898 * stop and unload, returning with state UNLOADED 1899 * and session unreserved. 1900 */ 1901 pfm_context_unload(ctx, NULL, 0, regs); 1902 1903 DPRINT(("ctx_state=%d\n", ctx->ctx_state)); 1904 } 1905 } 1906 1907 /* 1908 * remove virtual mapping, if any, for the calling task. 1909 * cannot reset ctx field until last user is calling close(). 1910 * 1911 * ctx_smpl_vaddr must never be cleared because it is needed 1912 * by every task with access to the context 1913 * 1914 * When called from do_exit(), the mm context is gone already, therefore 1915 * mm is NULL, i.e., the VMA is already gone and we do not have to 1916 * do anything here 1917 */ 1918 if (ctx->ctx_smpl_vaddr && current->mm) { 1919 smpl_buf_vaddr = ctx->ctx_smpl_vaddr; 1920 smpl_buf_size = ctx->ctx_smpl_size; 1921 } 1922 1923 UNPROTECT_CTX(ctx, flags); 1924 1925 /* 1926 * if there was a mapping, then we systematically remove it 1927 * at this point. Cannot be done inside critical section 1928 * because some VM function reenables interrupts. 1929 * 1930 */ 1931 if (smpl_buf_vaddr) pfm_remove_smpl_mapping(smpl_buf_vaddr, smpl_buf_size); 1932 1933 return 0; 1934 } 1935 /* 1936 * called either on explicit close() or from exit_files(). 1937 * Only the LAST user of the file gets to this point, i.e., it is 1938 * called only ONCE. 1939 * 1940 * IMPORTANT: we get called ONLY when the refcnt on the file gets to zero 1941 * (fput()),i.e, last task to access the file. Nobody else can access the 1942 * file at this point. 1943 * 1944 * When called from exit_files(), the VMA has been freed because exit_mm() 1945 * is executed before exit_files(). 1946 * 1947 * When called from exit_files(), the current task is not yet ZOMBIE but we 1948 * flush the PMU state to the context. 1949 */ 1950 static int 1951 pfm_close(struct inode *inode, struct file *filp) 1952 { 1953 pfm_context_t *ctx; 1954 struct task_struct *task; 1955 struct pt_regs *regs; 1956 DECLARE_WAITQUEUE(wait, current); 1957 unsigned long flags; 1958 unsigned long smpl_buf_size = 0UL; 1959 void *smpl_buf_addr = NULL; 1960 int free_possible = 1; 1961 int state, is_system; 1962 1963 DPRINT(("pfm_close called private=%p\n", filp->private_data)); 1964 1965 if (PFM_IS_FILE(filp) == 0) { 1966 DPRINT(("bad magic\n")); 1967 return -EBADF; 1968 } 1969 1970 ctx = filp->private_data; 1971 if (ctx == NULL) { 1972 printk(KERN_ERR "perfmon: pfm_close: NULL ctx [%d]\n", task_pid_nr(current)); 1973 return -EBADF; 1974 } 1975 1976 PROTECT_CTX(ctx, flags); 1977 1978 state = ctx->ctx_state; 1979 is_system = ctx->ctx_fl_system; 1980 1981 task = PFM_CTX_TASK(ctx); 1982 regs = task_pt_regs(task); 1983 1984 DPRINT(("ctx_state=%d is_current=%d\n", 1985 state, 1986 task == current ? 1 : 0)); 1987 1988 /* 1989 * if task == current, then pfm_flush() unloaded the context 1990 */ 1991 if (state == PFM_CTX_UNLOADED) goto doit; 1992 1993 /* 1994 * context is loaded/masked and task != current, we need to 1995 * either force an unload or go zombie 1996 */ 1997 1998 /* 1999 * The task is currently blocked or will block after an overflow. 2000 * we must force it to wakeup to get out of the 2001 * MASKED state and transition to the unloaded state by itself. 2002 * 2003 * This situation is only possible for per-task mode 2004 */ 2005 if (state == PFM_CTX_MASKED && CTX_OVFL_NOBLOCK(ctx) == 0) { 2006 2007 /* 2008 * set a "partial" zombie state to be checked 2009 * upon return from down() in pfm_handle_work(). 2010 * 2011 * We cannot use the ZOMBIE state, because it is checked 2012 * by pfm_load_regs() which is called upon wakeup from down(). 2013 * In such case, it would free the context and then we would 2014 * return to pfm_handle_work() which would access the 2015 * stale context. Instead, we set a flag invisible to pfm_load_regs() 2016 * but visible to pfm_handle_work(). 2017 * 2018 * For some window of time, we have a zombie context with 2019 * ctx_state = MASKED and not ZOMBIE 2020 */ 2021 ctx->ctx_fl_going_zombie = 1; 2022 2023 /* 2024 * force task to wake up from MASKED state 2025 */ 2026 complete(&ctx->ctx_restart_done); 2027 2028 DPRINT(("waking up ctx_state=%d\n", state)); 2029 2030 /* 2031 * put ourself to sleep waiting for the other 2032 * task to report completion 2033 * 2034 * the context is protected by mutex, therefore there 2035 * is no risk of being notified of completion before 2036 * begin actually on the waitq. 2037 */ 2038 set_current_state(TASK_INTERRUPTIBLE); 2039 add_wait_queue(&ctx->ctx_zombieq, &wait); 2040 2041 UNPROTECT_CTX(ctx, flags); 2042 2043 /* 2044 * XXX: check for signals : 2045 * - ok for explicit close 2046 * - not ok when coming from exit_files() 2047 */ 2048 schedule(); 2049 2050 2051 PROTECT_CTX(ctx, flags); 2052 2053 2054 remove_wait_queue(&ctx->ctx_zombieq, &wait); 2055 set_current_state(TASK_RUNNING); 2056 2057 /* 2058 * context is unloaded at this point 2059 */ 2060 DPRINT(("after zombie wakeup ctx_state=%d for\n", state)); 2061 } 2062 else if (task != current) { 2063 #ifdef CONFIG_SMP 2064 /* 2065 * switch context to zombie state 2066 */ 2067 ctx->ctx_state = PFM_CTX_ZOMBIE; 2068 2069 DPRINT(("zombie ctx for [%d]\n", task_pid_nr(task))); 2070 /* 2071 * cannot free the context on the spot. deferred until 2072 * the task notices the ZOMBIE state 2073 */ 2074 free_possible = 0; 2075 #else 2076 pfm_context_unload(ctx, NULL, 0, regs); 2077 #endif 2078 } 2079 2080 doit: 2081 /* reload state, may have changed during opening of critical section */ 2082 state = ctx->ctx_state; 2083 2084 /* 2085 * the context is still attached to a task (possibly current) 2086 * we cannot destroy it right now 2087 */ 2088 2089 /* 2090 * we must free the sampling buffer right here because 2091 * we cannot rely on it being cleaned up later by the 2092 * monitored task. It is not possible to free vmalloc'ed 2093 * memory in pfm_load_regs(). Instead, we remove the buffer 2094 * now. should there be subsequent PMU overflow originally 2095 * meant for sampling, the will be converted to spurious 2096 * and that's fine because the monitoring tools is gone anyway. 2097 */ 2098 if (ctx->ctx_smpl_hdr) { 2099 smpl_buf_addr = ctx->ctx_smpl_hdr; 2100 smpl_buf_size = ctx->ctx_smpl_size; 2101 /* no more sampling */ 2102 ctx->ctx_smpl_hdr = NULL; 2103 ctx->ctx_fl_is_sampling = 0; 2104 } 2105 2106 DPRINT(("ctx_state=%d free_possible=%d addr=%p size=%lu\n", 2107 state, 2108 free_possible, 2109 smpl_buf_addr, 2110 smpl_buf_size)); 2111 2112 if (smpl_buf_addr) pfm_exit_smpl_buffer(ctx->ctx_buf_fmt); 2113 2114 /* 2115 * UNLOADED that the session has already been unreserved. 2116 */ 2117 if (state == PFM_CTX_ZOMBIE) { 2118 pfm_unreserve_session(ctx, ctx->ctx_fl_system , ctx->ctx_cpu); 2119 } 2120 2121 /* 2122 * disconnect file descriptor from context must be done 2123 * before we unlock. 2124 */ 2125 filp->private_data = NULL; 2126 2127 /* 2128 * if we free on the spot, the context is now completely unreachable 2129 * from the callers side. The monitored task side is also cut, so we 2130 * can freely cut. 2131 * 2132 * If we have a deferred free, only the caller side is disconnected. 2133 */ 2134 UNPROTECT_CTX(ctx, flags); 2135 2136 /* 2137 * All memory free operations (especially for vmalloc'ed memory) 2138 * MUST be done with interrupts ENABLED. 2139 */ 2140 if (smpl_buf_addr) pfm_rvfree(smpl_buf_addr, smpl_buf_size); 2141 2142 /* 2143 * return the memory used by the context 2144 */ 2145 if (free_possible) pfm_context_free(ctx); 2146 2147 return 0; 2148 } 2149 2150 static const struct file_operations pfm_file_ops = { 2151 .llseek = no_llseek, 2152 .read = pfm_read, 2153 .write = pfm_write, 2154 .poll = pfm_poll, 2155 .unlocked_ioctl = pfm_ioctl, 2156 .fasync = pfm_fasync, 2157 .release = pfm_close, 2158 .flush = pfm_flush 2159 }; 2160 2161 static char *pfmfs_dname(struct dentry *dentry, char *buffer, int buflen) 2162 { 2163 return dynamic_dname(dentry, buffer, buflen, "pfm:[%lu]", 2164 d_inode(dentry)->i_ino); 2165 } 2166 2167 static const struct dentry_operations pfmfs_dentry_operations = { 2168 .d_delete = always_delete_dentry, 2169 .d_dname = pfmfs_dname, 2170 }; 2171 2172 2173 static struct file * 2174 pfm_alloc_file(pfm_context_t *ctx) 2175 { 2176 struct file *file; 2177 struct inode *inode; 2178 struct path path; 2179 struct qstr this = { .name = "" }; 2180 2181 /* 2182 * allocate a new inode 2183 */ 2184 inode = new_inode(pfmfs_mnt->mnt_sb); 2185 if (!inode) 2186 return ERR_PTR(-ENOMEM); 2187 2188 DPRINT(("new inode ino=%ld @%p\n", inode->i_ino, inode)); 2189 2190 inode->i_mode = S_IFCHR|S_IRUGO; 2191 inode->i_uid = current_fsuid(); 2192 inode->i_gid = current_fsgid(); 2193 2194 /* 2195 * allocate a new dcache entry 2196 */ 2197 path.dentry = d_alloc(pfmfs_mnt->mnt_root, &this); 2198 if (!path.dentry) { 2199 iput(inode); 2200 return ERR_PTR(-ENOMEM); 2201 } 2202 path.mnt = mntget(pfmfs_mnt); 2203 2204 d_add(path.dentry, inode); 2205 2206 file = alloc_file(&path, FMODE_READ, &pfm_file_ops); 2207 if (IS_ERR(file)) { 2208 path_put(&path); 2209 return file; 2210 } 2211 2212 file->f_flags = O_RDONLY; 2213 file->private_data = ctx; 2214 2215 return file; 2216 } 2217 2218 static int 2219 pfm_remap_buffer(struct vm_area_struct *vma, unsigned long buf, unsigned long addr, unsigned long size) 2220 { 2221 DPRINT(("CPU%d buf=0x%lx addr=0x%lx size=%ld\n", smp_processor_id(), buf, addr, size)); 2222 2223 while (size > 0) { 2224 unsigned long pfn = ia64_tpa(buf) >> PAGE_SHIFT; 2225 2226 2227 if (remap_pfn_range(vma, addr, pfn, PAGE_SIZE, PAGE_READONLY)) 2228 return -ENOMEM; 2229 2230 addr += PAGE_SIZE; 2231 buf += PAGE_SIZE; 2232 size -= PAGE_SIZE; 2233 } 2234 return 0; 2235 } 2236 2237 /* 2238 * allocate a sampling buffer and remaps it into the user address space of the task 2239 */ 2240 static int 2241 pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned long rsize, void **user_vaddr) 2242 { 2243 struct mm_struct *mm = task->mm; 2244 struct vm_area_struct *vma = NULL; 2245 unsigned long size; 2246 void *smpl_buf; 2247 2248 2249 /* 2250 * the fixed header + requested size and align to page boundary 2251 */ 2252 size = PAGE_ALIGN(rsize); 2253 2254 DPRINT(("sampling buffer rsize=%lu size=%lu bytes\n", rsize, size)); 2255 2256 /* 2257 * check requested size to avoid Denial-of-service attacks 2258 * XXX: may have to refine this test 2259 * Check against address space limit. 2260 * 2261 * if ((mm->total_vm << PAGE_SHIFT) + len> task->rlim[RLIMIT_AS].rlim_cur) 2262 * return -ENOMEM; 2263 */ 2264 if (size > task_rlimit(task, RLIMIT_MEMLOCK)) 2265 return -ENOMEM; 2266 2267 /* 2268 * We do the easy to undo allocations first. 2269 * 2270 * pfm_rvmalloc(), clears the buffer, so there is no leak 2271 */ 2272 smpl_buf = pfm_rvmalloc(size); 2273 if (smpl_buf == NULL) { 2274 DPRINT(("Can't allocate sampling buffer\n")); 2275 return -ENOMEM; 2276 } 2277 2278 DPRINT(("smpl_buf @%p\n", smpl_buf)); 2279 2280 /* allocate vma */ 2281 vma = vm_area_alloc(mm); 2282 if (!vma) { 2283 DPRINT(("Cannot allocate vma\n")); 2284 goto error_kmem; 2285 } 2286 2287 /* 2288 * partially initialize the vma for the sampling buffer 2289 */ 2290 vma->vm_file = get_file(filp); 2291 vma->vm_flags = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP; 2292 vma->vm_page_prot = PAGE_READONLY; /* XXX may need to change */ 2293 2294 /* 2295 * Now we have everything we need and we can initialize 2296 * and connect all the data structures 2297 */ 2298 2299 ctx->ctx_smpl_hdr = smpl_buf; 2300 ctx->ctx_smpl_size = size; /* aligned size */ 2301 2302 /* 2303 * Let's do the difficult operations next. 2304 * 2305 * now we atomically find some area in the address space and 2306 * remap the buffer in it. 2307 */ 2308 down_write(&task->mm->mmap_sem); 2309 2310 /* find some free area in address space, must have mmap sem held */ 2311 vma->vm_start = get_unmapped_area(NULL, 0, size, 0, MAP_PRIVATE|MAP_ANONYMOUS); 2312 if (IS_ERR_VALUE(vma->vm_start)) { 2313 DPRINT(("Cannot find unmapped area for size %ld\n", size)); 2314 up_write(&task->mm->mmap_sem); 2315 goto error; 2316 } 2317 vma->vm_end = vma->vm_start + size; 2318 vma->vm_pgoff = vma->vm_start >> PAGE_SHIFT; 2319 2320 DPRINT(("aligned size=%ld, hdr=%p mapped @0x%lx\n", size, ctx->ctx_smpl_hdr, vma->vm_start)); 2321 2322 /* can only be applied to current task, need to have the mm semaphore held when called */ 2323 if (pfm_remap_buffer(vma, (unsigned long)smpl_buf, vma->vm_start, size)) { 2324 DPRINT(("Can't remap buffer\n")); 2325 up_write(&task->mm->mmap_sem); 2326 goto error; 2327 } 2328 2329 /* 2330 * now insert the vma in the vm list for the process, must be 2331 * done with mmap lock held 2332 */ 2333 insert_vm_struct(mm, vma); 2334 2335 vm_stat_account(vma->vm_mm, vma->vm_flags, vma_pages(vma)); 2336 up_write(&task->mm->mmap_sem); 2337 2338 /* 2339 * keep track of user level virtual address 2340 */ 2341 ctx->ctx_smpl_vaddr = (void *)vma->vm_start; 2342 *(unsigned long *)user_vaddr = vma->vm_start; 2343 2344 return 0; 2345 2346 error: 2347 vm_area_free(vma); 2348 error_kmem: 2349 pfm_rvfree(smpl_buf, size); 2350 2351 return -ENOMEM; 2352 } 2353 2354 /* 2355 * XXX: do something better here 2356 */ 2357 static int 2358 pfm_bad_permissions(struct task_struct *task) 2359 { 2360 const struct cred *tcred; 2361 kuid_t uid = current_uid(); 2362 kgid_t gid = current_gid(); 2363 int ret; 2364 2365 rcu_read_lock(); 2366 tcred = __task_cred(task); 2367 2368 /* inspired by ptrace_attach() */ 2369 DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", 2370 from_kuid(&init_user_ns, uid), 2371 from_kgid(&init_user_ns, gid), 2372 from_kuid(&init_user_ns, tcred->euid), 2373 from_kuid(&init_user_ns, tcred->suid), 2374 from_kuid(&init_user_ns, tcred->uid), 2375 from_kgid(&init_user_ns, tcred->egid), 2376 from_kgid(&init_user_ns, tcred->sgid))); 2377 2378 ret = ((!uid_eq(uid, tcred->euid)) 2379 || (!uid_eq(uid, tcred->suid)) 2380 || (!uid_eq(uid, tcred->uid)) 2381 || (!gid_eq(gid, tcred->egid)) 2382 || (!gid_eq(gid, tcred->sgid)) 2383 || (!gid_eq(gid, tcred->gid))) && !capable(CAP_SYS_PTRACE); 2384 2385 rcu_read_unlock(); 2386 return ret; 2387 } 2388 2389 static int 2390 pfarg_is_sane(struct task_struct *task, pfarg_context_t *pfx) 2391 { 2392 int ctx_flags; 2393 2394 /* valid signal */ 2395 2396 ctx_flags = pfx->ctx_flags; 2397 2398 if (ctx_flags & PFM_FL_SYSTEM_WIDE) { 2399 2400 /* 2401 * cannot block in this mode 2402 */ 2403 if (ctx_flags & PFM_FL_NOTIFY_BLOCK) { 2404 DPRINT(("cannot use blocking mode when in system wide monitoring\n")); 2405 return -EINVAL; 2406 } 2407 } else { 2408 } 2409 /* probably more to add here */ 2410 2411 return 0; 2412 } 2413 2414 static int 2415 pfm_setup_buffer_fmt(struct task_struct *task, struct file *filp, pfm_context_t *ctx, unsigned int ctx_flags, 2416 unsigned int cpu, pfarg_context_t *arg) 2417 { 2418 pfm_buffer_fmt_t *fmt = NULL; 2419 unsigned long size = 0UL; 2420 void *uaddr = NULL; 2421 void *fmt_arg = NULL; 2422 int ret = 0; 2423 #define PFM_CTXARG_BUF_ARG(a) (pfm_buffer_fmt_t *)(a+1) 2424 2425 /* invoke and lock buffer format, if found */ 2426 fmt = pfm_find_buffer_fmt(arg->ctx_smpl_buf_id); 2427 if (fmt == NULL) { 2428 DPRINT(("[%d] cannot find buffer format\n", task_pid_nr(task))); 2429 return -EINVAL; 2430 } 2431 2432 /* 2433 * buffer argument MUST be contiguous to pfarg_context_t 2434 */ 2435 if (fmt->fmt_arg_size) fmt_arg = PFM_CTXARG_BUF_ARG(arg); 2436 2437 ret = pfm_buf_fmt_validate(fmt, task, ctx_flags, cpu, fmt_arg); 2438 2439 DPRINT(("[%d] after validate(0x%x,%d,%p)=%d\n", task_pid_nr(task), ctx_flags, cpu, fmt_arg, ret)); 2440 2441 if (ret) goto error; 2442 2443 /* link buffer format and context */ 2444 ctx->ctx_buf_fmt = fmt; 2445 ctx->ctx_fl_is_sampling = 1; /* assume record() is defined */ 2446 2447 /* 2448 * check if buffer format wants to use perfmon buffer allocation/mapping service 2449 */ 2450 ret = pfm_buf_fmt_getsize(fmt, task, ctx_flags, cpu, fmt_arg, &size); 2451 if (ret) goto error; 2452 2453 if (size) { 2454 /* 2455 * buffer is always remapped into the caller's address space 2456 */ 2457 ret = pfm_smpl_buffer_alloc(current, filp, ctx, size, &uaddr); 2458 if (ret) goto error; 2459 2460 /* keep track of user address of buffer */ 2461 arg->ctx_smpl_vaddr = uaddr; 2462 } 2463 ret = pfm_buf_fmt_init(fmt, task, ctx->ctx_smpl_hdr, ctx_flags, cpu, fmt_arg); 2464 2465 error: 2466 return ret; 2467 } 2468 2469 static void 2470 pfm_reset_pmu_state(pfm_context_t *ctx) 2471 { 2472 int i; 2473 2474 /* 2475 * install reset values for PMC. 2476 */ 2477 for (i=1; PMC_IS_LAST(i) == 0; i++) { 2478 if (PMC_IS_IMPL(i) == 0) continue; 2479 ctx->ctx_pmcs[i] = PMC_DFL_VAL(i); 2480 DPRINT(("pmc[%d]=0x%lx\n", i, ctx->ctx_pmcs[i])); 2481 } 2482 /* 2483 * PMD registers are set to 0UL when the context in memset() 2484 */ 2485 2486 /* 2487 * On context switched restore, we must restore ALL pmc and ALL pmd even 2488 * when they are not actively used by the task. In UP, the incoming process 2489 * may otherwise pick up left over PMC, PMD state from the previous process. 2490 * As opposed to PMD, stale PMC can cause harm to the incoming 2491 * process because they may change what is being measured. 2492 * Therefore, we must systematically reinstall the entire 2493 * PMC state. In SMP, the same thing is possible on the 2494 * same CPU but also on between 2 CPUs. 2495 * 2496 * The problem with PMD is information leaking especially 2497 * to user level when psr.sp=0 2498 * 2499 * There is unfortunately no easy way to avoid this problem 2500 * on either UP or SMP. This definitively slows down the 2501 * pfm_load_regs() function. 2502 */ 2503 2504 /* 2505 * bitmask of all PMCs accessible to this context 2506 * 2507 * PMC0 is treated differently. 2508 */ 2509 ctx->ctx_all_pmcs[0] = pmu_conf->impl_pmcs[0] & ~0x1; 2510 2511 /* 2512 * bitmask of all PMDs that are accessible to this context 2513 */ 2514 ctx->ctx_all_pmds[0] = pmu_conf->impl_pmds[0]; 2515 2516 DPRINT(("<%d> all_pmcs=0x%lx all_pmds=0x%lx\n", ctx->ctx_fd, ctx->ctx_all_pmcs[0],ctx->ctx_all_pmds[0])); 2517 2518 /* 2519 * useful in case of re-enable after disable 2520 */ 2521 ctx->ctx_used_ibrs[0] = 0UL; 2522 ctx->ctx_used_dbrs[0] = 0UL; 2523 } 2524 2525 static int 2526 pfm_ctx_getsize(void *arg, size_t *sz) 2527 { 2528 pfarg_context_t *req = (pfarg_context_t *)arg; 2529 pfm_buffer_fmt_t *fmt; 2530 2531 *sz = 0; 2532 2533 if (!pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) return 0; 2534 2535 fmt = pfm_find_buffer_fmt(req->ctx_smpl_buf_id); 2536 if (fmt == NULL) { 2537 DPRINT(("cannot find buffer format\n")); 2538 return -EINVAL; 2539 } 2540 /* get just enough to copy in user parameters */ 2541 *sz = fmt->fmt_arg_size; 2542 DPRINT(("arg_size=%lu\n", *sz)); 2543 2544 return 0; 2545 } 2546 2547 2548 2549 /* 2550 * cannot attach if : 2551 * - kernel task 2552 * - task not owned by caller 2553 * - task incompatible with context mode 2554 */ 2555 static int 2556 pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) 2557 { 2558 /* 2559 * no kernel task or task not owner by caller 2560 */ 2561 if (task->mm == NULL) { 2562 DPRINT(("task [%d] has not memory context (kernel thread)\n", task_pid_nr(task))); 2563 return -EPERM; 2564 } 2565 if (pfm_bad_permissions(task)) { 2566 DPRINT(("no permission to attach to [%d]\n", task_pid_nr(task))); 2567 return -EPERM; 2568 } 2569 /* 2570 * cannot block in self-monitoring mode 2571 */ 2572 if (CTX_OVFL_NOBLOCK(ctx) == 0 && task == current) { 2573 DPRINT(("cannot load a blocking context on self for [%d]\n", task_pid_nr(task))); 2574 return -EINVAL; 2575 } 2576 2577 if (task->exit_state == EXIT_ZOMBIE) { 2578 DPRINT(("cannot attach to zombie task [%d]\n", task_pid_nr(task))); 2579 return -EBUSY; 2580 } 2581 2582 /* 2583 * always ok for self 2584 */ 2585 if (task == current) return 0; 2586 2587 if (!task_is_stopped_or_traced(task)) { 2588 DPRINT(("cannot attach to non-stopped task [%d] state=%ld\n", task_pid_nr(task), task->state)); 2589 return -EBUSY; 2590 } 2591 /* 2592 * make sure the task is off any CPU 2593 */ 2594 wait_task_inactive(task, 0); 2595 2596 /* more to come... */ 2597 2598 return 0; 2599 } 2600 2601 static int 2602 pfm_get_task(pfm_context_t *ctx, pid_t pid, struct task_struct **task) 2603 { 2604 struct task_struct *p = current; 2605 int ret; 2606 2607 /* XXX: need to add more checks here */ 2608 if (pid < 2) return -EPERM; 2609 2610 if (pid != task_pid_vnr(current)) { 2611 /* make sure task cannot go away while we operate on it */ 2612 p = find_get_task_by_vpid(pid); 2613 if (!p) 2614 return -ESRCH; 2615 } 2616 2617 ret = pfm_task_incompatible(ctx, p); 2618 if (ret == 0) { 2619 *task = p; 2620 } else if (p != current) { 2621 pfm_put_task(p); 2622 } 2623 return ret; 2624 } 2625 2626 2627 2628 static int 2629 pfm_context_create(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2630 { 2631 pfarg_context_t *req = (pfarg_context_t *)arg; 2632 struct file *filp; 2633 struct path path; 2634 int ctx_flags; 2635 int fd; 2636 int ret; 2637 2638 /* let's check the arguments first */ 2639 ret = pfarg_is_sane(current, req); 2640 if (ret < 0) 2641 return ret; 2642 2643 ctx_flags = req->ctx_flags; 2644 2645 ret = -ENOMEM; 2646 2647 fd = get_unused_fd_flags(0); 2648 if (fd < 0) 2649 return fd; 2650 2651 ctx = pfm_context_alloc(ctx_flags); 2652 if (!ctx) 2653 goto error; 2654 2655 filp = pfm_alloc_file(ctx); 2656 if (IS_ERR(filp)) { 2657 ret = PTR_ERR(filp); 2658 goto error_file; 2659 } 2660 2661 req->ctx_fd = ctx->ctx_fd = fd; 2662 2663 /* 2664 * does the user want to sample? 2665 */ 2666 if (pfm_uuid_cmp(req->ctx_smpl_buf_id, pfm_null_uuid)) { 2667 ret = pfm_setup_buffer_fmt(current, filp, ctx, ctx_flags, 0, req); 2668 if (ret) 2669 goto buffer_error; 2670 } 2671 2672 DPRINT(("ctx=%p flags=0x%x system=%d notify_block=%d excl_idle=%d no_msg=%d ctx_fd=%d\n", 2673 ctx, 2674 ctx_flags, 2675 ctx->ctx_fl_system, 2676 ctx->ctx_fl_block, 2677 ctx->ctx_fl_excl_idle, 2678 ctx->ctx_fl_no_msg, 2679 ctx->ctx_fd)); 2680 2681 /* 2682 * initialize soft PMU state 2683 */ 2684 pfm_reset_pmu_state(ctx); 2685 2686 fd_install(fd, filp); 2687 2688 return 0; 2689 2690 buffer_error: 2691 path = filp->f_path; 2692 put_filp(filp); 2693 path_put(&path); 2694 2695 if (ctx->ctx_buf_fmt) { 2696 pfm_buf_fmt_exit(ctx->ctx_buf_fmt, current, NULL, regs); 2697 } 2698 error_file: 2699 pfm_context_free(ctx); 2700 2701 error: 2702 put_unused_fd(fd); 2703 return ret; 2704 } 2705 2706 static inline unsigned long 2707 pfm_new_counter_value (pfm_counter_t *reg, int is_long_reset) 2708 { 2709 unsigned long val = is_long_reset ? reg->long_reset : reg->short_reset; 2710 unsigned long new_seed, old_seed = reg->seed, mask = reg->mask; 2711 extern unsigned long carta_random32 (unsigned long seed); 2712 2713 if (reg->flags & PFM_REGFL_RANDOM) { 2714 new_seed = carta_random32(old_seed); 2715 val -= (old_seed & mask); /* counter values are negative numbers! */ 2716 if ((mask >> 32) != 0) 2717 /* construct a full 64-bit random value: */ 2718 new_seed |= carta_random32(old_seed >> 32) << 32; 2719 reg->seed = new_seed; 2720 } 2721 reg->lval = val; 2722 return val; 2723 } 2724 2725 static void 2726 pfm_reset_regs_masked(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2727 { 2728 unsigned long mask = ovfl_regs[0]; 2729 unsigned long reset_others = 0UL; 2730 unsigned long val; 2731 int i; 2732 2733 /* 2734 * now restore reset value on sampling overflowed counters 2735 */ 2736 mask >>= PMU_FIRST_COUNTER; 2737 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2738 2739 if ((mask & 0x1UL) == 0UL) continue; 2740 2741 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2742 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2743 2744 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2745 } 2746 2747 /* 2748 * Now take care of resetting the other registers 2749 */ 2750 for(i = 0; reset_others; i++, reset_others >>= 1) { 2751 2752 if ((reset_others & 0x1) == 0) continue; 2753 2754 ctx->ctx_pmds[i].val = val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2755 2756 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2757 is_long_reset ? "long" : "short", i, val)); 2758 } 2759 } 2760 2761 static void 2762 pfm_reset_regs(pfm_context_t *ctx, unsigned long *ovfl_regs, int is_long_reset) 2763 { 2764 unsigned long mask = ovfl_regs[0]; 2765 unsigned long reset_others = 0UL; 2766 unsigned long val; 2767 int i; 2768 2769 DPRINT_ovfl(("ovfl_regs=0x%lx is_long_reset=%d\n", ovfl_regs[0], is_long_reset)); 2770 2771 if (ctx->ctx_state == PFM_CTX_MASKED) { 2772 pfm_reset_regs_masked(ctx, ovfl_regs, is_long_reset); 2773 return; 2774 } 2775 2776 /* 2777 * now restore reset value on sampling overflowed counters 2778 */ 2779 mask >>= PMU_FIRST_COUNTER; 2780 for(i = PMU_FIRST_COUNTER; mask; i++, mask >>= 1) { 2781 2782 if ((mask & 0x1UL) == 0UL) continue; 2783 2784 val = pfm_new_counter_value(ctx->ctx_pmds+ i, is_long_reset); 2785 reset_others |= ctx->ctx_pmds[i].reset_pmds[0]; 2786 2787 DPRINT_ovfl((" %s reset ctx_pmds[%d]=%lx\n", is_long_reset ? "long" : "short", i, val)); 2788 2789 pfm_write_soft_counter(ctx, i, val); 2790 } 2791 2792 /* 2793 * Now take care of resetting the other registers 2794 */ 2795 for(i = 0; reset_others; i++, reset_others >>= 1) { 2796 2797 if ((reset_others & 0x1) == 0) continue; 2798 2799 val = pfm_new_counter_value(ctx->ctx_pmds + i, is_long_reset); 2800 2801 if (PMD_IS_COUNTING(i)) { 2802 pfm_write_soft_counter(ctx, i, val); 2803 } else { 2804 ia64_set_pmd(i, val); 2805 } 2806 DPRINT_ovfl(("%s reset_others pmd[%d]=%lx\n", 2807 is_long_reset ? "long" : "short", i, val)); 2808 } 2809 ia64_srlz_d(); 2810 } 2811 2812 static int 2813 pfm_write_pmcs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 2814 { 2815 struct task_struct *task; 2816 pfarg_reg_t *req = (pfarg_reg_t *)arg; 2817 unsigned long value, pmc_pm; 2818 unsigned long smpl_pmds, reset_pmds, impl_pmds; 2819 unsigned int cnum, reg_flags, flags, pmc_type; 2820 int i, can_access_pmu = 0, is_loaded, is_system, expert_mode; 2821 int is_monitor, is_counting, state; 2822 int ret = -EINVAL; 2823 pfm_reg_check_t wr_func; 2824 #define PFM_CHECK_PMC_PM(x, y, z) ((x)->ctx_fl_system ^ PMC_PM(y, z)) 2825 2826 state = ctx->ctx_state; 2827 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 2828 is_system = ctx->ctx_fl_system; 2829 task = ctx->ctx_task; 2830 impl_pmds = pmu_conf->impl_pmds[0]; 2831 2832 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 2833 2834 if (is_loaded) { 2835 /* 2836 * In system wide and when the context is loaded, access can only happen 2837 * when the caller is running on the CPU being monitored by the session. 2838 * It does not have to be the owner (ctx_task) of the context per se. 2839 */ 2840 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 2841 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 2842 return -EBUSY; 2843 } 2844 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 2845 } 2846 expert_mode = pfm_sysctl.expert_mode; 2847 2848 for (i = 0; i < count; i++, req++) { 2849 2850 cnum = req->reg_num; 2851 reg_flags = req->reg_flags; 2852 value = req->reg_value; 2853 smpl_pmds = req->reg_smpl_pmds[0]; 2854 reset_pmds = req->reg_reset_pmds[0]; 2855 flags = 0; 2856 2857 2858 if (cnum >= PMU_MAX_PMCS) { 2859 DPRINT(("pmc%u is invalid\n", cnum)); 2860 goto error; 2861 } 2862 2863 pmc_type = pmu_conf->pmc_desc[cnum].type; 2864 pmc_pm = (value >> pmu_conf->pmc_desc[cnum].pm_pos) & 0x1; 2865 is_counting = (pmc_type & PFM_REG_COUNTING) == PFM_REG_COUNTING ? 1 : 0; 2866 is_monitor = (pmc_type & PFM_REG_MONITOR) == PFM_REG_MONITOR ? 1 : 0; 2867 2868 /* 2869 * we reject all non implemented PMC as well 2870 * as attempts to modify PMC[0-3] which are used 2871 * as status registers by the PMU 2872 */ 2873 if ((pmc_type & PFM_REG_IMPL) == 0 || (pmc_type & PFM_REG_CONTROL) == PFM_REG_CONTROL) { 2874 DPRINT(("pmc%u is unimplemented or no-access pmc_type=%x\n", cnum, pmc_type)); 2875 goto error; 2876 } 2877 wr_func = pmu_conf->pmc_desc[cnum].write_check; 2878 /* 2879 * If the PMC is a monitor, then if the value is not the default: 2880 * - system-wide session: PMCx.pm=1 (privileged monitor) 2881 * - per-task : PMCx.pm=0 (user monitor) 2882 */ 2883 if (is_monitor && value != PMC_DFL_VAL(cnum) && is_system ^ pmc_pm) { 2884 DPRINT(("pmc%u pmc_pm=%lu is_system=%d\n", 2885 cnum, 2886 pmc_pm, 2887 is_system)); 2888 goto error; 2889 } 2890 2891 if (is_counting) { 2892 /* 2893 * enforce generation of overflow interrupt. Necessary on all 2894 * CPUs. 2895 */ 2896 value |= 1 << PMU_PMC_OI; 2897 2898 if (reg_flags & PFM_REGFL_OVFL_NOTIFY) { 2899 flags |= PFM_REGFL_OVFL_NOTIFY; 2900 } 2901 2902 if (reg_flags & PFM_REGFL_RANDOM) flags |= PFM_REGFL_RANDOM; 2903 2904 /* verify validity of smpl_pmds */ 2905 if ((smpl_pmds & impl_pmds) != smpl_pmds) { 2906 DPRINT(("invalid smpl_pmds 0x%lx for pmc%u\n", smpl_pmds, cnum)); 2907 goto error; 2908 } 2909 2910 /* verify validity of reset_pmds */ 2911 if ((reset_pmds & impl_pmds) != reset_pmds) { 2912 DPRINT(("invalid reset_pmds 0x%lx for pmc%u\n", reset_pmds, cnum)); 2913 goto error; 2914 } 2915 } else { 2916 if (reg_flags & (PFM_REGFL_OVFL_NOTIFY|PFM_REGFL_RANDOM)) { 2917 DPRINT(("cannot set ovfl_notify or random on pmc%u\n", cnum)); 2918 goto error; 2919 } 2920 /* eventid on non-counting monitors are ignored */ 2921 } 2922 2923 /* 2924 * execute write checker, if any 2925 */ 2926 if (likely(expert_mode == 0 && wr_func)) { 2927 ret = (*wr_func)(task, ctx, cnum, &value, regs); 2928 if (ret) goto error; 2929 ret = -EINVAL; 2930 } 2931 2932 /* 2933 * no error on this register 2934 */ 2935 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 2936 2937 /* 2938 * Now we commit the changes to the software state 2939 */ 2940 2941 /* 2942 * update overflow information 2943 */ 2944 if (is_counting) { 2945 /* 2946 * full flag update each time a register is programmed 2947 */ 2948 ctx->ctx_pmds[cnum].flags = flags; 2949 2950 ctx->ctx_pmds[cnum].reset_pmds[0] = reset_pmds; 2951 ctx->ctx_pmds[cnum].smpl_pmds[0] = smpl_pmds; 2952 ctx->ctx_pmds[cnum].eventid = req->reg_smpl_eventid; 2953 2954 /* 2955 * Mark all PMDS to be accessed as used. 2956 * 2957 * We do not keep track of PMC because we have to 2958 * systematically restore ALL of them. 2959 * 2960 * We do not update the used_monitors mask, because 2961 * if we have not programmed them, then will be in 2962 * a quiescent state, therefore we will not need to 2963 * mask/restore then when context is MASKED. 2964 */ 2965 CTX_USED_PMD(ctx, reset_pmds); 2966 CTX_USED_PMD(ctx, smpl_pmds); 2967 /* 2968 * make sure we do not try to reset on 2969 * restart because we have established new values 2970 */ 2971 if (state == PFM_CTX_MASKED) ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 2972 } 2973 /* 2974 * Needed in case the user does not initialize the equivalent 2975 * PMD. Clearing is done indirectly via pfm_reset_pmu_state() so there is no 2976 * possible leak here. 2977 */ 2978 CTX_USED_PMD(ctx, pmu_conf->pmc_desc[cnum].dep_pmd[0]); 2979 2980 /* 2981 * keep track of the monitor PMC that we are using. 2982 * we save the value of the pmc in ctx_pmcs[] and if 2983 * the monitoring is not stopped for the context we also 2984 * place it in the saved state area so that it will be 2985 * picked up later by the context switch code. 2986 * 2987 * The value in ctx_pmcs[] can only be changed in pfm_write_pmcs(). 2988 * 2989 * The value in th_pmcs[] may be modified on overflow, i.e., when 2990 * monitoring needs to be stopped. 2991 */ 2992 if (is_monitor) CTX_USED_MONITOR(ctx, 1UL << cnum); 2993 2994 /* 2995 * update context state 2996 */ 2997 ctx->ctx_pmcs[cnum] = value; 2998 2999 if (is_loaded) { 3000 /* 3001 * write thread state 3002 */ 3003 if (is_system == 0) ctx->th_pmcs[cnum] = value; 3004 3005 /* 3006 * write hardware register if we can 3007 */ 3008 if (can_access_pmu) { 3009 ia64_set_pmc(cnum, value); 3010 } 3011 #ifdef CONFIG_SMP 3012 else { 3013 /* 3014 * per-task SMP only here 3015 * 3016 * we are guaranteed that the task is not running on the other CPU, 3017 * we indicate that this PMD will need to be reloaded if the task 3018 * is rescheduled on the CPU it ran last on. 3019 */ 3020 ctx->ctx_reload_pmcs[0] |= 1UL << cnum; 3021 } 3022 #endif 3023 } 3024 3025 DPRINT(("pmc[%u]=0x%lx ld=%d apmu=%d flags=0x%x all_pmcs=0x%lx used_pmds=0x%lx eventid=%ld smpl_pmds=0x%lx reset_pmds=0x%lx reloads_pmcs=0x%lx used_monitors=0x%lx ovfl_regs=0x%lx\n", 3026 cnum, 3027 value, 3028 is_loaded, 3029 can_access_pmu, 3030 flags, 3031 ctx->ctx_all_pmcs[0], 3032 ctx->ctx_used_pmds[0], 3033 ctx->ctx_pmds[cnum].eventid, 3034 smpl_pmds, 3035 reset_pmds, 3036 ctx->ctx_reload_pmcs[0], 3037 ctx->ctx_used_monitors[0], 3038 ctx->ctx_ovfl_regs[0])); 3039 } 3040 3041 /* 3042 * make sure the changes are visible 3043 */ 3044 if (can_access_pmu) ia64_srlz_d(); 3045 3046 return 0; 3047 error: 3048 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3049 return ret; 3050 } 3051 3052 static int 3053 pfm_write_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3054 { 3055 struct task_struct *task; 3056 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3057 unsigned long value, hw_value, ovfl_mask; 3058 unsigned int cnum; 3059 int i, can_access_pmu = 0, state; 3060 int is_counting, is_loaded, is_system, expert_mode; 3061 int ret = -EINVAL; 3062 pfm_reg_check_t wr_func; 3063 3064 3065 state = ctx->ctx_state; 3066 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3067 is_system = ctx->ctx_fl_system; 3068 ovfl_mask = pmu_conf->ovfl_val; 3069 task = ctx->ctx_task; 3070 3071 if (unlikely(state == PFM_CTX_ZOMBIE)) return -EINVAL; 3072 3073 /* 3074 * on both UP and SMP, we can only write to the PMC when the task is 3075 * the owner of the local PMU. 3076 */ 3077 if (likely(is_loaded)) { 3078 /* 3079 * In system wide and when the context is loaded, access can only happen 3080 * when the caller is running on the CPU being monitored by the session. 3081 * It does not have to be the owner (ctx_task) of the context per se. 3082 */ 3083 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3084 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3085 return -EBUSY; 3086 } 3087 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3088 } 3089 expert_mode = pfm_sysctl.expert_mode; 3090 3091 for (i = 0; i < count; i++, req++) { 3092 3093 cnum = req->reg_num; 3094 value = req->reg_value; 3095 3096 if (!PMD_IS_IMPL(cnum)) { 3097 DPRINT(("pmd[%u] is unimplemented or invalid\n", cnum)); 3098 goto abort_mission; 3099 } 3100 is_counting = PMD_IS_COUNTING(cnum); 3101 wr_func = pmu_conf->pmd_desc[cnum].write_check; 3102 3103 /* 3104 * execute write checker, if any 3105 */ 3106 if (unlikely(expert_mode == 0 && wr_func)) { 3107 unsigned long v = value; 3108 3109 ret = (*wr_func)(task, ctx, cnum, &v, regs); 3110 if (ret) goto abort_mission; 3111 3112 value = v; 3113 ret = -EINVAL; 3114 } 3115 3116 /* 3117 * no error on this register 3118 */ 3119 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 3120 3121 /* 3122 * now commit changes to software state 3123 */ 3124 hw_value = value; 3125 3126 /* 3127 * update virtualized (64bits) counter 3128 */ 3129 if (is_counting) { 3130 /* 3131 * write context state 3132 */ 3133 ctx->ctx_pmds[cnum].lval = value; 3134 3135 /* 3136 * when context is load we use the split value 3137 */ 3138 if (is_loaded) { 3139 hw_value = value & ovfl_mask; 3140 value = value & ~ovfl_mask; 3141 } 3142 } 3143 /* 3144 * update reset values (not just for counters) 3145 */ 3146 ctx->ctx_pmds[cnum].long_reset = req->reg_long_reset; 3147 ctx->ctx_pmds[cnum].short_reset = req->reg_short_reset; 3148 3149 /* 3150 * update randomization parameters (not just for counters) 3151 */ 3152 ctx->ctx_pmds[cnum].seed = req->reg_random_seed; 3153 ctx->ctx_pmds[cnum].mask = req->reg_random_mask; 3154 3155 /* 3156 * update context value 3157 */ 3158 ctx->ctx_pmds[cnum].val = value; 3159 3160 /* 3161 * Keep track of what we use 3162 * 3163 * We do not keep track of PMC because we have to 3164 * systematically restore ALL of them. 3165 */ 3166 CTX_USED_PMD(ctx, PMD_PMD_DEP(cnum)); 3167 3168 /* 3169 * mark this PMD register used as well 3170 */ 3171 CTX_USED_PMD(ctx, RDEP(cnum)); 3172 3173 /* 3174 * make sure we do not try to reset on 3175 * restart because we have established new values 3176 */ 3177 if (is_counting && state == PFM_CTX_MASKED) { 3178 ctx->ctx_ovfl_regs[0] &= ~1UL << cnum; 3179 } 3180 3181 if (is_loaded) { 3182 /* 3183 * write thread state 3184 */ 3185 if (is_system == 0) ctx->th_pmds[cnum] = hw_value; 3186 3187 /* 3188 * write hardware register if we can 3189 */ 3190 if (can_access_pmu) { 3191 ia64_set_pmd(cnum, hw_value); 3192 } else { 3193 #ifdef CONFIG_SMP 3194 /* 3195 * we are guaranteed that the task is not running on the other CPU, 3196 * we indicate that this PMD will need to be reloaded if the task 3197 * is rescheduled on the CPU it ran last on. 3198 */ 3199 ctx->ctx_reload_pmds[0] |= 1UL << cnum; 3200 #endif 3201 } 3202 } 3203 3204 DPRINT(("pmd[%u]=0x%lx ld=%d apmu=%d, hw_value=0x%lx ctx_pmd=0x%lx short_reset=0x%lx " 3205 "long_reset=0x%lx notify=%c seed=0x%lx mask=0x%lx used_pmds=0x%lx reset_pmds=0x%lx reload_pmds=0x%lx all_pmds=0x%lx ovfl_regs=0x%lx\n", 3206 cnum, 3207 value, 3208 is_loaded, 3209 can_access_pmu, 3210 hw_value, 3211 ctx->ctx_pmds[cnum].val, 3212 ctx->ctx_pmds[cnum].short_reset, 3213 ctx->ctx_pmds[cnum].long_reset, 3214 PMC_OVFL_NOTIFY(ctx, cnum) ? 'Y':'N', 3215 ctx->ctx_pmds[cnum].seed, 3216 ctx->ctx_pmds[cnum].mask, 3217 ctx->ctx_used_pmds[0], 3218 ctx->ctx_pmds[cnum].reset_pmds[0], 3219 ctx->ctx_reload_pmds[0], 3220 ctx->ctx_all_pmds[0], 3221 ctx->ctx_ovfl_regs[0])); 3222 } 3223 3224 /* 3225 * make changes visible 3226 */ 3227 if (can_access_pmu) ia64_srlz_d(); 3228 3229 return 0; 3230 3231 abort_mission: 3232 /* 3233 * for now, we have only one possibility for error 3234 */ 3235 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3236 return ret; 3237 } 3238 3239 /* 3240 * By the way of PROTECT_CONTEXT(), interrupts are masked while we are in this function. 3241 * Therefore we know, we do not have to worry about the PMU overflow interrupt. If an 3242 * interrupt is delivered during the call, it will be kept pending until we leave, making 3243 * it appears as if it had been generated at the UNPROTECT_CONTEXT(). At least we are 3244 * guaranteed to return consistent data to the user, it may simply be old. It is not 3245 * trivial to treat the overflow while inside the call because you may end up in 3246 * some module sampling buffer code causing deadlocks. 3247 */ 3248 static int 3249 pfm_read_pmds(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3250 { 3251 struct task_struct *task; 3252 unsigned long val = 0UL, lval, ovfl_mask, sval; 3253 pfarg_reg_t *req = (pfarg_reg_t *)arg; 3254 unsigned int cnum, reg_flags = 0; 3255 int i, can_access_pmu = 0, state; 3256 int is_loaded, is_system, is_counting, expert_mode; 3257 int ret = -EINVAL; 3258 pfm_reg_check_t rd_func; 3259 3260 /* 3261 * access is possible when loaded only for 3262 * self-monitoring tasks or in UP mode 3263 */ 3264 3265 state = ctx->ctx_state; 3266 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3267 is_system = ctx->ctx_fl_system; 3268 ovfl_mask = pmu_conf->ovfl_val; 3269 task = ctx->ctx_task; 3270 3271 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3272 3273 if (likely(is_loaded)) { 3274 /* 3275 * In system wide and when the context is loaded, access can only happen 3276 * when the caller is running on the CPU being monitored by the session. 3277 * It does not have to be the owner (ctx_task) of the context per se. 3278 */ 3279 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3280 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3281 return -EBUSY; 3282 } 3283 /* 3284 * this can be true when not self-monitoring only in UP 3285 */ 3286 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3287 3288 if (can_access_pmu) ia64_srlz_d(); 3289 } 3290 expert_mode = pfm_sysctl.expert_mode; 3291 3292 DPRINT(("ld=%d apmu=%d ctx_state=%d\n", 3293 is_loaded, 3294 can_access_pmu, 3295 state)); 3296 3297 /* 3298 * on both UP and SMP, we can only read the PMD from the hardware register when 3299 * the task is the owner of the local PMU. 3300 */ 3301 3302 for (i = 0; i < count; i++, req++) { 3303 3304 cnum = req->reg_num; 3305 reg_flags = req->reg_flags; 3306 3307 if (unlikely(!PMD_IS_IMPL(cnum))) goto error; 3308 /* 3309 * we can only read the register that we use. That includes 3310 * the one we explicitly initialize AND the one we want included 3311 * in the sampling buffer (smpl_regs). 3312 * 3313 * Having this restriction allows optimization in the ctxsw routine 3314 * without compromising security (leaks) 3315 */ 3316 if (unlikely(!CTX_IS_USED_PMD(ctx, cnum))) goto error; 3317 3318 sval = ctx->ctx_pmds[cnum].val; 3319 lval = ctx->ctx_pmds[cnum].lval; 3320 is_counting = PMD_IS_COUNTING(cnum); 3321 3322 /* 3323 * If the task is not the current one, then we check if the 3324 * PMU state is still in the local live register due to lazy ctxsw. 3325 * If true, then we read directly from the registers. 3326 */ 3327 if (can_access_pmu){ 3328 val = ia64_get_pmd(cnum); 3329 } else { 3330 /* 3331 * context has been saved 3332 * if context is zombie, then task does not exist anymore. 3333 * In this case, we use the full value saved in the context (pfm_flush_regs()). 3334 */ 3335 val = is_loaded ? ctx->th_pmds[cnum] : 0UL; 3336 } 3337 rd_func = pmu_conf->pmd_desc[cnum].read_check; 3338 3339 if (is_counting) { 3340 /* 3341 * XXX: need to check for overflow when loaded 3342 */ 3343 val &= ovfl_mask; 3344 val += sval; 3345 } 3346 3347 /* 3348 * execute read checker, if any 3349 */ 3350 if (unlikely(expert_mode == 0 && rd_func)) { 3351 unsigned long v = val; 3352 ret = (*rd_func)(ctx->ctx_task, ctx, cnum, &v, regs); 3353 if (ret) goto error; 3354 val = v; 3355 ret = -EINVAL; 3356 } 3357 3358 PFM_REG_RETFLAG_SET(reg_flags, 0); 3359 3360 DPRINT(("pmd[%u]=0x%lx\n", cnum, val)); 3361 3362 /* 3363 * update register return value, abort all if problem during copy. 3364 * we only modify the reg_flags field. no check mode is fine because 3365 * access has been verified upfront in sys_perfmonctl(). 3366 */ 3367 req->reg_value = val; 3368 req->reg_flags = reg_flags; 3369 req->reg_last_reset_val = lval; 3370 } 3371 3372 return 0; 3373 3374 error: 3375 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 3376 return ret; 3377 } 3378 3379 int 3380 pfm_mod_write_pmcs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3381 { 3382 pfm_context_t *ctx; 3383 3384 if (req == NULL) return -EINVAL; 3385 3386 ctx = GET_PMU_CTX(); 3387 3388 if (ctx == NULL) return -EINVAL; 3389 3390 /* 3391 * for now limit to current task, which is enough when calling 3392 * from overflow handler 3393 */ 3394 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3395 3396 return pfm_write_pmcs(ctx, req, nreq, regs); 3397 } 3398 EXPORT_SYMBOL(pfm_mod_write_pmcs); 3399 3400 int 3401 pfm_mod_read_pmds(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3402 { 3403 pfm_context_t *ctx; 3404 3405 if (req == NULL) return -EINVAL; 3406 3407 ctx = GET_PMU_CTX(); 3408 3409 if (ctx == NULL) return -EINVAL; 3410 3411 /* 3412 * for now limit to current task, which is enough when calling 3413 * from overflow handler 3414 */ 3415 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3416 3417 return pfm_read_pmds(ctx, req, nreq, regs); 3418 } 3419 EXPORT_SYMBOL(pfm_mod_read_pmds); 3420 3421 /* 3422 * Only call this function when a process it trying to 3423 * write the debug registers (reading is always allowed) 3424 */ 3425 int 3426 pfm_use_debug_registers(struct task_struct *task) 3427 { 3428 pfm_context_t *ctx = task->thread.pfm_context; 3429 unsigned long flags; 3430 int ret = 0; 3431 3432 if (pmu_conf->use_rr_dbregs == 0) return 0; 3433 3434 DPRINT(("called for [%d]\n", task_pid_nr(task))); 3435 3436 /* 3437 * do it only once 3438 */ 3439 if (task->thread.flags & IA64_THREAD_DBG_VALID) return 0; 3440 3441 /* 3442 * Even on SMP, we do not need to use an atomic here because 3443 * the only way in is via ptrace() and this is possible only when the 3444 * process is stopped. Even in the case where the ctxsw out is not totally 3445 * completed by the time we come here, there is no way the 'stopped' process 3446 * could be in the middle of fiddling with the pfm_write_ibr_dbr() routine. 3447 * So this is always safe. 3448 */ 3449 if (ctx && ctx->ctx_fl_using_dbreg == 1) return -1; 3450 3451 LOCK_PFS(flags); 3452 3453 /* 3454 * We cannot allow setting breakpoints when system wide monitoring 3455 * sessions are using the debug registers. 3456 */ 3457 if (pfm_sessions.pfs_sys_use_dbregs> 0) 3458 ret = -1; 3459 else 3460 pfm_sessions.pfs_ptrace_use_dbregs++; 3461 3462 DPRINT(("ptrace_use_dbregs=%u sys_use_dbregs=%u by [%d] ret = %d\n", 3463 pfm_sessions.pfs_ptrace_use_dbregs, 3464 pfm_sessions.pfs_sys_use_dbregs, 3465 task_pid_nr(task), ret)); 3466 3467 UNLOCK_PFS(flags); 3468 3469 return ret; 3470 } 3471 3472 /* 3473 * This function is called for every task that exits with the 3474 * IA64_THREAD_DBG_VALID set. This indicates a task which was 3475 * able to use the debug registers for debugging purposes via 3476 * ptrace(). Therefore we know it was not using them for 3477 * performance monitoring, so we only decrement the number 3478 * of "ptraced" debug register users to keep the count up to date 3479 */ 3480 int 3481 pfm_release_debug_registers(struct task_struct *task) 3482 { 3483 unsigned long flags; 3484 int ret; 3485 3486 if (pmu_conf->use_rr_dbregs == 0) return 0; 3487 3488 LOCK_PFS(flags); 3489 if (pfm_sessions.pfs_ptrace_use_dbregs == 0) { 3490 printk(KERN_ERR "perfmon: invalid release for [%d] ptrace_use_dbregs=0\n", task_pid_nr(task)); 3491 ret = -1; 3492 } else { 3493 pfm_sessions.pfs_ptrace_use_dbregs--; 3494 ret = 0; 3495 } 3496 UNLOCK_PFS(flags); 3497 3498 return ret; 3499 } 3500 3501 static int 3502 pfm_restart(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3503 { 3504 struct task_struct *task; 3505 pfm_buffer_fmt_t *fmt; 3506 pfm_ovfl_ctrl_t rst_ctrl; 3507 int state, is_system; 3508 int ret = 0; 3509 3510 state = ctx->ctx_state; 3511 fmt = ctx->ctx_buf_fmt; 3512 is_system = ctx->ctx_fl_system; 3513 task = PFM_CTX_TASK(ctx); 3514 3515 switch(state) { 3516 case PFM_CTX_MASKED: 3517 break; 3518 case PFM_CTX_LOADED: 3519 if (CTX_HAS_SMPL(ctx) && fmt->fmt_restart_active) break; 3520 /* fall through */ 3521 case PFM_CTX_UNLOADED: 3522 case PFM_CTX_ZOMBIE: 3523 DPRINT(("invalid state=%d\n", state)); 3524 return -EBUSY; 3525 default: 3526 DPRINT(("state=%d, cannot operate (no active_restart handler)\n", state)); 3527 return -EINVAL; 3528 } 3529 3530 /* 3531 * In system wide and when the context is loaded, access can only happen 3532 * when the caller is running on the CPU being monitored by the session. 3533 * It does not have to be the owner (ctx_task) of the context per se. 3534 */ 3535 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3536 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3537 return -EBUSY; 3538 } 3539 3540 /* sanity check */ 3541 if (unlikely(task == NULL)) { 3542 printk(KERN_ERR "perfmon: [%d] pfm_restart no task\n", task_pid_nr(current)); 3543 return -EINVAL; 3544 } 3545 3546 if (task == current || is_system) { 3547 3548 fmt = ctx->ctx_buf_fmt; 3549 3550 DPRINT(("restarting self %d ovfl=0x%lx\n", 3551 task_pid_nr(task), 3552 ctx->ctx_ovfl_regs[0])); 3553 3554 if (CTX_HAS_SMPL(ctx)) { 3555 3556 prefetch(ctx->ctx_smpl_hdr); 3557 3558 rst_ctrl.bits.mask_monitoring = 0; 3559 rst_ctrl.bits.reset_ovfl_pmds = 0; 3560 3561 if (state == PFM_CTX_LOADED) 3562 ret = pfm_buf_fmt_restart_active(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3563 else 3564 ret = pfm_buf_fmt_restart(fmt, task, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 3565 } else { 3566 rst_ctrl.bits.mask_monitoring = 0; 3567 rst_ctrl.bits.reset_ovfl_pmds = 1; 3568 } 3569 3570 if (ret == 0) { 3571 if (rst_ctrl.bits.reset_ovfl_pmds) 3572 pfm_reset_regs(ctx, ctx->ctx_ovfl_regs, PFM_PMD_LONG_RESET); 3573 3574 if (rst_ctrl.bits.mask_monitoring == 0) { 3575 DPRINT(("resuming monitoring for [%d]\n", task_pid_nr(task))); 3576 3577 if (state == PFM_CTX_MASKED) pfm_restore_monitoring(task); 3578 } else { 3579 DPRINT(("keeping monitoring stopped for [%d]\n", task_pid_nr(task))); 3580 3581 // cannot use pfm_stop_monitoring(task, regs); 3582 } 3583 } 3584 /* 3585 * clear overflowed PMD mask to remove any stale information 3586 */ 3587 ctx->ctx_ovfl_regs[0] = 0UL; 3588 3589 /* 3590 * back to LOADED state 3591 */ 3592 ctx->ctx_state = PFM_CTX_LOADED; 3593 3594 /* 3595 * XXX: not really useful for self monitoring 3596 */ 3597 ctx->ctx_fl_can_restart = 0; 3598 3599 return 0; 3600 } 3601 3602 /* 3603 * restart another task 3604 */ 3605 3606 /* 3607 * When PFM_CTX_MASKED, we cannot issue a restart before the previous 3608 * one is seen by the task. 3609 */ 3610 if (state == PFM_CTX_MASKED) { 3611 if (ctx->ctx_fl_can_restart == 0) return -EINVAL; 3612 /* 3613 * will prevent subsequent restart before this one is 3614 * seen by other task 3615 */ 3616 ctx->ctx_fl_can_restart = 0; 3617 } 3618 3619 /* 3620 * if blocking, then post the semaphore is PFM_CTX_MASKED, i.e. 3621 * the task is blocked or on its way to block. That's the normal 3622 * restart path. If the monitoring is not masked, then the task 3623 * can be actively monitoring and we cannot directly intervene. 3624 * Therefore we use the trap mechanism to catch the task and 3625 * force it to reset the buffer/reset PMDs. 3626 * 3627 * if non-blocking, then we ensure that the task will go into 3628 * pfm_handle_work() before returning to user mode. 3629 * 3630 * We cannot explicitly reset another task, it MUST always 3631 * be done by the task itself. This works for system wide because 3632 * the tool that is controlling the session is logically doing 3633 * "self-monitoring". 3634 */ 3635 if (CTX_OVFL_NOBLOCK(ctx) == 0 && state == PFM_CTX_MASKED) { 3636 DPRINT(("unblocking [%d]\n", task_pid_nr(task))); 3637 complete(&ctx->ctx_restart_done); 3638 } else { 3639 DPRINT(("[%d] armed exit trap\n", task_pid_nr(task))); 3640 3641 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_RESET; 3642 3643 PFM_SET_WORK_PENDING(task, 1); 3644 3645 set_notify_resume(task); 3646 3647 /* 3648 * XXX: send reschedule if task runs on another CPU 3649 */ 3650 } 3651 return 0; 3652 } 3653 3654 static int 3655 pfm_debug(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3656 { 3657 unsigned int m = *(unsigned int *)arg; 3658 3659 pfm_sysctl.debug = m == 0 ? 0 : 1; 3660 3661 printk(KERN_INFO "perfmon debugging %s (timing reset)\n", pfm_sysctl.debug ? "on" : "off"); 3662 3663 if (m == 0) { 3664 memset(pfm_stats, 0, sizeof(pfm_stats)); 3665 for(m=0; m < NR_CPUS; m++) pfm_stats[m].pfm_ovfl_intr_cycles_min = ~0UL; 3666 } 3667 return 0; 3668 } 3669 3670 /* 3671 * arg can be NULL and count can be zero for this function 3672 */ 3673 static int 3674 pfm_write_ibr_dbr(int mode, pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3675 { 3676 struct thread_struct *thread = NULL; 3677 struct task_struct *task; 3678 pfarg_dbreg_t *req = (pfarg_dbreg_t *)arg; 3679 unsigned long flags; 3680 dbreg_t dbreg; 3681 unsigned int rnum; 3682 int first_time; 3683 int ret = 0, state; 3684 int i, can_access_pmu = 0; 3685 int is_system, is_loaded; 3686 3687 if (pmu_conf->use_rr_dbregs == 0) return -EINVAL; 3688 3689 state = ctx->ctx_state; 3690 is_loaded = state == PFM_CTX_LOADED ? 1 : 0; 3691 is_system = ctx->ctx_fl_system; 3692 task = ctx->ctx_task; 3693 3694 if (state == PFM_CTX_ZOMBIE) return -EINVAL; 3695 3696 /* 3697 * on both UP and SMP, we can only write to the PMC when the task is 3698 * the owner of the local PMU. 3699 */ 3700 if (is_loaded) { 3701 thread = &task->thread; 3702 /* 3703 * In system wide and when the context is loaded, access can only happen 3704 * when the caller is running on the CPU being monitored by the session. 3705 * It does not have to be the owner (ctx_task) of the context per se. 3706 */ 3707 if (unlikely(is_system && ctx->ctx_cpu != smp_processor_id())) { 3708 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3709 return -EBUSY; 3710 } 3711 can_access_pmu = GET_PMU_OWNER() == task || is_system ? 1 : 0; 3712 } 3713 3714 /* 3715 * we do not need to check for ipsr.db because we do clear ibr.x, dbr.r, and dbr.w 3716 * ensuring that no real breakpoint can be installed via this call. 3717 * 3718 * IMPORTANT: regs can be NULL in this function 3719 */ 3720 3721 first_time = ctx->ctx_fl_using_dbreg == 0; 3722 3723 /* 3724 * don't bother if we are loaded and task is being debugged 3725 */ 3726 if (is_loaded && (thread->flags & IA64_THREAD_DBG_VALID) != 0) { 3727 DPRINT(("debug registers already in use for [%d]\n", task_pid_nr(task))); 3728 return -EBUSY; 3729 } 3730 3731 /* 3732 * check for debug registers in system wide mode 3733 * 3734 * If though a check is done in pfm_context_load(), 3735 * we must repeat it here, in case the registers are 3736 * written after the context is loaded 3737 */ 3738 if (is_loaded) { 3739 LOCK_PFS(flags); 3740 3741 if (first_time && is_system) { 3742 if (pfm_sessions.pfs_ptrace_use_dbregs) 3743 ret = -EBUSY; 3744 else 3745 pfm_sessions.pfs_sys_use_dbregs++; 3746 } 3747 UNLOCK_PFS(flags); 3748 } 3749 3750 if (ret != 0) return ret; 3751 3752 /* 3753 * mark ourself as user of the debug registers for 3754 * perfmon purposes. 3755 */ 3756 ctx->ctx_fl_using_dbreg = 1; 3757 3758 /* 3759 * clear hardware registers to make sure we don't 3760 * pick up stale state. 3761 * 3762 * for a system wide session, we do not use 3763 * thread.dbr, thread.ibr because this process 3764 * never leaves the current CPU and the state 3765 * is shared by all processes running on it 3766 */ 3767 if (first_time && can_access_pmu) { 3768 DPRINT(("[%d] clearing ibrs, dbrs\n", task_pid_nr(task))); 3769 for (i=0; i < pmu_conf->num_ibrs; i++) { 3770 ia64_set_ibr(i, 0UL); 3771 ia64_dv_serialize_instruction(); 3772 } 3773 ia64_srlz_i(); 3774 for (i=0; i < pmu_conf->num_dbrs; i++) { 3775 ia64_set_dbr(i, 0UL); 3776 ia64_dv_serialize_data(); 3777 } 3778 ia64_srlz_d(); 3779 } 3780 3781 /* 3782 * Now install the values into the registers 3783 */ 3784 for (i = 0; i < count; i++, req++) { 3785 3786 rnum = req->dbreg_num; 3787 dbreg.val = req->dbreg_value; 3788 3789 ret = -EINVAL; 3790 3791 if ((mode == PFM_CODE_RR && rnum >= PFM_NUM_IBRS) || ((mode == PFM_DATA_RR) && rnum >= PFM_NUM_DBRS)) { 3792 DPRINT(("invalid register %u val=0x%lx mode=%d i=%d count=%d\n", 3793 rnum, dbreg.val, mode, i, count)); 3794 3795 goto abort_mission; 3796 } 3797 3798 /* 3799 * make sure we do not install enabled breakpoint 3800 */ 3801 if (rnum & 0x1) { 3802 if (mode == PFM_CODE_RR) 3803 dbreg.ibr.ibr_x = 0; 3804 else 3805 dbreg.dbr.dbr_r = dbreg.dbr.dbr_w = 0; 3806 } 3807 3808 PFM_REG_RETFLAG_SET(req->dbreg_flags, 0); 3809 3810 /* 3811 * Debug registers, just like PMC, can only be modified 3812 * by a kernel call. Moreover, perfmon() access to those 3813 * registers are centralized in this routine. The hardware 3814 * does not modify the value of these registers, therefore, 3815 * if we save them as they are written, we can avoid having 3816 * to save them on context switch out. This is made possible 3817 * by the fact that when perfmon uses debug registers, ptrace() 3818 * won't be able to modify them concurrently. 3819 */ 3820 if (mode == PFM_CODE_RR) { 3821 CTX_USED_IBR(ctx, rnum); 3822 3823 if (can_access_pmu) { 3824 ia64_set_ibr(rnum, dbreg.val); 3825 ia64_dv_serialize_instruction(); 3826 } 3827 3828 ctx->ctx_ibrs[rnum] = dbreg.val; 3829 3830 DPRINT(("write ibr%u=0x%lx used_ibrs=0x%x ld=%d apmu=%d\n", 3831 rnum, dbreg.val, ctx->ctx_used_ibrs[0], is_loaded, can_access_pmu)); 3832 } else { 3833 CTX_USED_DBR(ctx, rnum); 3834 3835 if (can_access_pmu) { 3836 ia64_set_dbr(rnum, dbreg.val); 3837 ia64_dv_serialize_data(); 3838 } 3839 ctx->ctx_dbrs[rnum] = dbreg.val; 3840 3841 DPRINT(("write dbr%u=0x%lx used_dbrs=0x%x ld=%d apmu=%d\n", 3842 rnum, dbreg.val, ctx->ctx_used_dbrs[0], is_loaded, can_access_pmu)); 3843 } 3844 } 3845 3846 return 0; 3847 3848 abort_mission: 3849 /* 3850 * in case it was our first attempt, we undo the global modifications 3851 */ 3852 if (first_time) { 3853 LOCK_PFS(flags); 3854 if (ctx->ctx_fl_system) { 3855 pfm_sessions.pfs_sys_use_dbregs--; 3856 } 3857 UNLOCK_PFS(flags); 3858 ctx->ctx_fl_using_dbreg = 0; 3859 } 3860 /* 3861 * install error return flag 3862 */ 3863 PFM_REG_RETFLAG_SET(req->dbreg_flags, PFM_REG_RETFL_EINVAL); 3864 3865 return ret; 3866 } 3867 3868 static int 3869 pfm_write_ibrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3870 { 3871 return pfm_write_ibr_dbr(PFM_CODE_RR, ctx, arg, count, regs); 3872 } 3873 3874 static int 3875 pfm_write_dbrs(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3876 { 3877 return pfm_write_ibr_dbr(PFM_DATA_RR, ctx, arg, count, regs); 3878 } 3879 3880 int 3881 pfm_mod_write_ibrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3882 { 3883 pfm_context_t *ctx; 3884 3885 if (req == NULL) return -EINVAL; 3886 3887 ctx = GET_PMU_CTX(); 3888 3889 if (ctx == NULL) return -EINVAL; 3890 3891 /* 3892 * for now limit to current task, which is enough when calling 3893 * from overflow handler 3894 */ 3895 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3896 3897 return pfm_write_ibrs(ctx, req, nreq, regs); 3898 } 3899 EXPORT_SYMBOL(pfm_mod_write_ibrs); 3900 3901 int 3902 pfm_mod_write_dbrs(struct task_struct *task, void *req, unsigned int nreq, struct pt_regs *regs) 3903 { 3904 pfm_context_t *ctx; 3905 3906 if (req == NULL) return -EINVAL; 3907 3908 ctx = GET_PMU_CTX(); 3909 3910 if (ctx == NULL) return -EINVAL; 3911 3912 /* 3913 * for now limit to current task, which is enough when calling 3914 * from overflow handler 3915 */ 3916 if (task != current && ctx->ctx_fl_system == 0) return -EBUSY; 3917 3918 return pfm_write_dbrs(ctx, req, nreq, regs); 3919 } 3920 EXPORT_SYMBOL(pfm_mod_write_dbrs); 3921 3922 3923 static int 3924 pfm_get_features(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3925 { 3926 pfarg_features_t *req = (pfarg_features_t *)arg; 3927 3928 req->ft_version = PFM_VERSION; 3929 return 0; 3930 } 3931 3932 static int 3933 pfm_stop(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 3934 { 3935 struct pt_regs *tregs; 3936 struct task_struct *task = PFM_CTX_TASK(ctx); 3937 int state, is_system; 3938 3939 state = ctx->ctx_state; 3940 is_system = ctx->ctx_fl_system; 3941 3942 /* 3943 * context must be attached to issue the stop command (includes LOADED,MASKED,ZOMBIE) 3944 */ 3945 if (state == PFM_CTX_UNLOADED) return -EINVAL; 3946 3947 /* 3948 * In system wide and when the context is loaded, access can only happen 3949 * when the caller is running on the CPU being monitored by the session. 3950 * It does not have to be the owner (ctx_task) of the context per se. 3951 */ 3952 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 3953 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 3954 return -EBUSY; 3955 } 3956 DPRINT(("task [%d] ctx_state=%d is_system=%d\n", 3957 task_pid_nr(PFM_CTX_TASK(ctx)), 3958 state, 3959 is_system)); 3960 /* 3961 * in system mode, we need to update the PMU directly 3962 * and the user level state of the caller, which may not 3963 * necessarily be the creator of the context. 3964 */ 3965 if (is_system) { 3966 /* 3967 * Update local PMU first 3968 * 3969 * disable dcr pp 3970 */ 3971 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) & ~IA64_DCR_PP); 3972 ia64_srlz_i(); 3973 3974 /* 3975 * update local cpuinfo 3976 */ 3977 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 3978 3979 /* 3980 * stop monitoring, does srlz.i 3981 */ 3982 pfm_clear_psr_pp(); 3983 3984 /* 3985 * stop monitoring in the caller 3986 */ 3987 ia64_psr(regs)->pp = 0; 3988 3989 return 0; 3990 } 3991 /* 3992 * per-task mode 3993 */ 3994 3995 if (task == current) { 3996 /* stop monitoring at kernel level */ 3997 pfm_clear_psr_up(); 3998 3999 /* 4000 * stop monitoring at the user level 4001 */ 4002 ia64_psr(regs)->up = 0; 4003 } else { 4004 tregs = task_pt_regs(task); 4005 4006 /* 4007 * stop monitoring at the user level 4008 */ 4009 ia64_psr(tregs)->up = 0; 4010 4011 /* 4012 * monitoring disabled in kernel at next reschedule 4013 */ 4014 ctx->ctx_saved_psr_up = 0; 4015 DPRINT(("task=[%d]\n", task_pid_nr(task))); 4016 } 4017 return 0; 4018 } 4019 4020 4021 static int 4022 pfm_start(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4023 { 4024 struct pt_regs *tregs; 4025 int state, is_system; 4026 4027 state = ctx->ctx_state; 4028 is_system = ctx->ctx_fl_system; 4029 4030 if (state != PFM_CTX_LOADED) return -EINVAL; 4031 4032 /* 4033 * In system wide and when the context is loaded, access can only happen 4034 * when the caller is running on the CPU being monitored by the session. 4035 * It does not have to be the owner (ctx_task) of the context per se. 4036 */ 4037 if (is_system && ctx->ctx_cpu != smp_processor_id()) { 4038 DPRINT(("should be running on CPU%d\n", ctx->ctx_cpu)); 4039 return -EBUSY; 4040 } 4041 4042 /* 4043 * in system mode, we need to update the PMU directly 4044 * and the user level state of the caller, which may not 4045 * necessarily be the creator of the context. 4046 */ 4047 if (is_system) { 4048 4049 /* 4050 * set user level psr.pp for the caller 4051 */ 4052 ia64_psr(regs)->pp = 1; 4053 4054 /* 4055 * now update the local PMU and cpuinfo 4056 */ 4057 PFM_CPUINFO_SET(PFM_CPUINFO_DCR_PP); 4058 4059 /* 4060 * start monitoring at kernel level 4061 */ 4062 pfm_set_psr_pp(); 4063 4064 /* enable dcr pp */ 4065 ia64_setreg(_IA64_REG_CR_DCR, ia64_getreg(_IA64_REG_CR_DCR) | IA64_DCR_PP); 4066 ia64_srlz_i(); 4067 4068 return 0; 4069 } 4070 4071 /* 4072 * per-process mode 4073 */ 4074 4075 if (ctx->ctx_task == current) { 4076 4077 /* start monitoring at kernel level */ 4078 pfm_set_psr_up(); 4079 4080 /* 4081 * activate monitoring at user level 4082 */ 4083 ia64_psr(regs)->up = 1; 4084 4085 } else { 4086 tregs = task_pt_regs(ctx->ctx_task); 4087 4088 /* 4089 * start monitoring at the kernel level the next 4090 * time the task is scheduled 4091 */ 4092 ctx->ctx_saved_psr_up = IA64_PSR_UP; 4093 4094 /* 4095 * activate monitoring at user level 4096 */ 4097 ia64_psr(tregs)->up = 1; 4098 } 4099 return 0; 4100 } 4101 4102 static int 4103 pfm_get_pmc_reset(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4104 { 4105 pfarg_reg_t *req = (pfarg_reg_t *)arg; 4106 unsigned int cnum; 4107 int i; 4108 int ret = -EINVAL; 4109 4110 for (i = 0; i < count; i++, req++) { 4111 4112 cnum = req->reg_num; 4113 4114 if (!PMC_IS_IMPL(cnum)) goto abort_mission; 4115 4116 req->reg_value = PMC_DFL_VAL(cnum); 4117 4118 PFM_REG_RETFLAG_SET(req->reg_flags, 0); 4119 4120 DPRINT(("pmc_reset_val pmc[%u]=0x%lx\n", cnum, req->reg_value)); 4121 } 4122 return 0; 4123 4124 abort_mission: 4125 PFM_REG_RETFLAG_SET(req->reg_flags, PFM_REG_RETFL_EINVAL); 4126 return ret; 4127 } 4128 4129 static int 4130 pfm_check_task_exist(pfm_context_t *ctx) 4131 { 4132 struct task_struct *g, *t; 4133 int ret = -ESRCH; 4134 4135 read_lock(&tasklist_lock); 4136 4137 do_each_thread (g, t) { 4138 if (t->thread.pfm_context == ctx) { 4139 ret = 0; 4140 goto out; 4141 } 4142 } while_each_thread (g, t); 4143 out: 4144 read_unlock(&tasklist_lock); 4145 4146 DPRINT(("pfm_check_task_exist: ret=%d ctx=%p\n", ret, ctx)); 4147 4148 return ret; 4149 } 4150 4151 static int 4152 pfm_context_load(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4153 { 4154 struct task_struct *task; 4155 struct thread_struct *thread; 4156 struct pfm_context_t *old; 4157 unsigned long flags; 4158 #ifndef CONFIG_SMP 4159 struct task_struct *owner_task = NULL; 4160 #endif 4161 pfarg_load_t *req = (pfarg_load_t *)arg; 4162 unsigned long *pmcs_source, *pmds_source; 4163 int the_cpu; 4164 int ret = 0; 4165 int state, is_system, set_dbregs = 0; 4166 4167 state = ctx->ctx_state; 4168 is_system = ctx->ctx_fl_system; 4169 /* 4170 * can only load from unloaded or terminated state 4171 */ 4172 if (state != PFM_CTX_UNLOADED) { 4173 DPRINT(("cannot load to [%d], invalid ctx_state=%d\n", 4174 req->load_pid, 4175 ctx->ctx_state)); 4176 return -EBUSY; 4177 } 4178 4179 DPRINT(("load_pid [%d] using_dbreg=%d\n", req->load_pid, ctx->ctx_fl_using_dbreg)); 4180 4181 if (CTX_OVFL_NOBLOCK(ctx) == 0 && req->load_pid == current->pid) { 4182 DPRINT(("cannot use blocking mode on self\n")); 4183 return -EINVAL; 4184 } 4185 4186 ret = pfm_get_task(ctx, req->load_pid, &task); 4187 if (ret) { 4188 DPRINT(("load_pid [%d] get_task=%d\n", req->load_pid, ret)); 4189 return ret; 4190 } 4191 4192 ret = -EINVAL; 4193 4194 /* 4195 * system wide is self monitoring only 4196 */ 4197 if (is_system && task != current) { 4198 DPRINT(("system wide is self monitoring only load_pid=%d\n", 4199 req->load_pid)); 4200 goto error; 4201 } 4202 4203 thread = &task->thread; 4204 4205 ret = 0; 4206 /* 4207 * cannot load a context which is using range restrictions, 4208 * into a task that is being debugged. 4209 */ 4210 if (ctx->ctx_fl_using_dbreg) { 4211 if (thread->flags & IA64_THREAD_DBG_VALID) { 4212 ret = -EBUSY; 4213 DPRINT(("load_pid [%d] task is debugged, cannot load range restrictions\n", req->load_pid)); 4214 goto error; 4215 } 4216 LOCK_PFS(flags); 4217 4218 if (is_system) { 4219 if (pfm_sessions.pfs_ptrace_use_dbregs) { 4220 DPRINT(("cannot load [%d] dbregs in use\n", 4221 task_pid_nr(task))); 4222 ret = -EBUSY; 4223 } else { 4224 pfm_sessions.pfs_sys_use_dbregs++; 4225 DPRINT(("load [%d] increased sys_use_dbreg=%u\n", task_pid_nr(task), pfm_sessions.pfs_sys_use_dbregs)); 4226 set_dbregs = 1; 4227 } 4228 } 4229 4230 UNLOCK_PFS(flags); 4231 4232 if (ret) goto error; 4233 } 4234 4235 /* 4236 * SMP system-wide monitoring implies self-monitoring. 4237 * 4238 * The programming model expects the task to 4239 * be pinned on a CPU throughout the session. 4240 * Here we take note of the current CPU at the 4241 * time the context is loaded. No call from 4242 * another CPU will be allowed. 4243 * 4244 * The pinning via shed_setaffinity() 4245 * must be done by the calling task prior 4246 * to this call. 4247 * 4248 * systemwide: keep track of CPU this session is supposed to run on 4249 */ 4250 the_cpu = ctx->ctx_cpu = smp_processor_id(); 4251 4252 ret = -EBUSY; 4253 /* 4254 * now reserve the session 4255 */ 4256 ret = pfm_reserve_session(current, is_system, the_cpu); 4257 if (ret) goto error; 4258 4259 /* 4260 * task is necessarily stopped at this point. 4261 * 4262 * If the previous context was zombie, then it got removed in 4263 * pfm_save_regs(). Therefore we should not see it here. 4264 * If we see a context, then this is an active context 4265 * 4266 * XXX: needs to be atomic 4267 */ 4268 DPRINT(("before cmpxchg() old_ctx=%p new_ctx=%p\n", 4269 thread->pfm_context, ctx)); 4270 4271 ret = -EBUSY; 4272 old = ia64_cmpxchg(acq, &thread->pfm_context, NULL, ctx, sizeof(pfm_context_t *)); 4273 if (old != NULL) { 4274 DPRINT(("load_pid [%d] already has a context\n", req->load_pid)); 4275 goto error_unres; 4276 } 4277 4278 pfm_reset_msgq(ctx); 4279 4280 ctx->ctx_state = PFM_CTX_LOADED; 4281 4282 /* 4283 * link context to task 4284 */ 4285 ctx->ctx_task = task; 4286 4287 if (is_system) { 4288 /* 4289 * we load as stopped 4290 */ 4291 PFM_CPUINFO_SET(PFM_CPUINFO_SYST_WIDE); 4292 PFM_CPUINFO_CLEAR(PFM_CPUINFO_DCR_PP); 4293 4294 if (ctx->ctx_fl_excl_idle) PFM_CPUINFO_SET(PFM_CPUINFO_EXCL_IDLE); 4295 } else { 4296 thread->flags |= IA64_THREAD_PM_VALID; 4297 } 4298 4299 /* 4300 * propagate into thread-state 4301 */ 4302 pfm_copy_pmds(task, ctx); 4303 pfm_copy_pmcs(task, ctx); 4304 4305 pmcs_source = ctx->th_pmcs; 4306 pmds_source = ctx->th_pmds; 4307 4308 /* 4309 * always the case for system-wide 4310 */ 4311 if (task == current) { 4312 4313 if (is_system == 0) { 4314 4315 /* allow user level control */ 4316 ia64_psr(regs)->sp = 0; 4317 DPRINT(("clearing psr.sp for [%d]\n", task_pid_nr(task))); 4318 4319 SET_LAST_CPU(ctx, smp_processor_id()); 4320 INC_ACTIVATION(); 4321 SET_ACTIVATION(ctx); 4322 #ifndef CONFIG_SMP 4323 /* 4324 * push the other task out, if any 4325 */ 4326 owner_task = GET_PMU_OWNER(); 4327 if (owner_task) pfm_lazy_save_regs(owner_task); 4328 #endif 4329 } 4330 /* 4331 * load all PMD from ctx to PMU (as opposed to thread state) 4332 * restore all PMC from ctx to PMU 4333 */ 4334 pfm_restore_pmds(pmds_source, ctx->ctx_all_pmds[0]); 4335 pfm_restore_pmcs(pmcs_source, ctx->ctx_all_pmcs[0]); 4336 4337 ctx->ctx_reload_pmcs[0] = 0UL; 4338 ctx->ctx_reload_pmds[0] = 0UL; 4339 4340 /* 4341 * guaranteed safe by earlier check against DBG_VALID 4342 */ 4343 if (ctx->ctx_fl_using_dbreg) { 4344 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 4345 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 4346 } 4347 /* 4348 * set new ownership 4349 */ 4350 SET_PMU_OWNER(task, ctx); 4351 4352 DPRINT(("context loaded on PMU for [%d]\n", task_pid_nr(task))); 4353 } else { 4354 /* 4355 * when not current, task MUST be stopped, so this is safe 4356 */ 4357 regs = task_pt_regs(task); 4358 4359 /* force a full reload */ 4360 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4361 SET_LAST_CPU(ctx, -1); 4362 4363 /* initial saved psr (stopped) */ 4364 ctx->ctx_saved_psr_up = 0UL; 4365 ia64_psr(regs)->up = ia64_psr(regs)->pp = 0; 4366 } 4367 4368 ret = 0; 4369 4370 error_unres: 4371 if (ret) pfm_unreserve_session(ctx, ctx->ctx_fl_system, the_cpu); 4372 error: 4373 /* 4374 * we must undo the dbregs setting (for system-wide) 4375 */ 4376 if (ret && set_dbregs) { 4377 LOCK_PFS(flags); 4378 pfm_sessions.pfs_sys_use_dbregs--; 4379 UNLOCK_PFS(flags); 4380 } 4381 /* 4382 * release task, there is now a link with the context 4383 */ 4384 if (is_system == 0 && task != current) { 4385 pfm_put_task(task); 4386 4387 if (ret == 0) { 4388 ret = pfm_check_task_exist(ctx); 4389 if (ret) { 4390 ctx->ctx_state = PFM_CTX_UNLOADED; 4391 ctx->ctx_task = NULL; 4392 } 4393 } 4394 } 4395 return ret; 4396 } 4397 4398 /* 4399 * in this function, we do not need to increase the use count 4400 * for the task via get_task_struct(), because we hold the 4401 * context lock. If the task were to disappear while having 4402 * a context attached, it would go through pfm_exit_thread() 4403 * which also grabs the context lock and would therefore be blocked 4404 * until we are here. 4405 */ 4406 static void pfm_flush_pmds(struct task_struct *, pfm_context_t *ctx); 4407 4408 static int 4409 pfm_context_unload(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs) 4410 { 4411 struct task_struct *task = PFM_CTX_TASK(ctx); 4412 struct pt_regs *tregs; 4413 int prev_state, is_system; 4414 int ret; 4415 4416 DPRINT(("ctx_state=%d task [%d]\n", ctx->ctx_state, task ? task_pid_nr(task) : -1)); 4417 4418 prev_state = ctx->ctx_state; 4419 is_system = ctx->ctx_fl_system; 4420 4421 /* 4422 * unload only when necessary 4423 */ 4424 if (prev_state == PFM_CTX_UNLOADED) { 4425 DPRINT(("ctx_state=%d, nothing to do\n", prev_state)); 4426 return 0; 4427 } 4428 4429 /* 4430 * clear psr and dcr bits 4431 */ 4432 ret = pfm_stop(ctx, NULL, 0, regs); 4433 if (ret) return ret; 4434 4435 ctx->ctx_state = PFM_CTX_UNLOADED; 4436 4437 /* 4438 * in system mode, we need to update the PMU directly 4439 * and the user level state of the caller, which may not 4440 * necessarily be the creator of the context. 4441 */ 4442 if (is_system) { 4443 4444 /* 4445 * Update cpuinfo 4446 * 4447 * local PMU is taken care of in pfm_stop() 4448 */ 4449 PFM_CPUINFO_CLEAR(PFM_CPUINFO_SYST_WIDE); 4450 PFM_CPUINFO_CLEAR(PFM_CPUINFO_EXCL_IDLE); 4451 4452 /* 4453 * save PMDs in context 4454 * release ownership 4455 */ 4456 pfm_flush_pmds(current, ctx); 4457 4458 /* 4459 * at this point we are done with the PMU 4460 * so we can unreserve the resource. 4461 */ 4462 if (prev_state != PFM_CTX_ZOMBIE) 4463 pfm_unreserve_session(ctx, 1 , ctx->ctx_cpu); 4464 4465 /* 4466 * disconnect context from task 4467 */ 4468 task->thread.pfm_context = NULL; 4469 /* 4470 * disconnect task from context 4471 */ 4472 ctx->ctx_task = NULL; 4473 4474 /* 4475 * There is nothing more to cleanup here. 4476 */ 4477 return 0; 4478 } 4479 4480 /* 4481 * per-task mode 4482 */ 4483 tregs = task == current ? regs : task_pt_regs(task); 4484 4485 if (task == current) { 4486 /* 4487 * cancel user level control 4488 */ 4489 ia64_psr(regs)->sp = 1; 4490 4491 DPRINT(("setting psr.sp for [%d]\n", task_pid_nr(task))); 4492 } 4493 /* 4494 * save PMDs to context 4495 * release ownership 4496 */ 4497 pfm_flush_pmds(task, ctx); 4498 4499 /* 4500 * at this point we are done with the PMU 4501 * so we can unreserve the resource. 4502 * 4503 * when state was ZOMBIE, we have already unreserved. 4504 */ 4505 if (prev_state != PFM_CTX_ZOMBIE) 4506 pfm_unreserve_session(ctx, 0 , ctx->ctx_cpu); 4507 4508 /* 4509 * reset activation counter and psr 4510 */ 4511 ctx->ctx_last_activation = PFM_INVALID_ACTIVATION; 4512 SET_LAST_CPU(ctx, -1); 4513 4514 /* 4515 * PMU state will not be restored 4516 */ 4517 task->thread.flags &= ~IA64_THREAD_PM_VALID; 4518 4519 /* 4520 * break links between context and task 4521 */ 4522 task->thread.pfm_context = NULL; 4523 ctx->ctx_task = NULL; 4524 4525 PFM_SET_WORK_PENDING(task, 0); 4526 4527 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 4528 ctx->ctx_fl_can_restart = 0; 4529 ctx->ctx_fl_going_zombie = 0; 4530 4531 DPRINT(("disconnected [%d] from context\n", task_pid_nr(task))); 4532 4533 return 0; 4534 } 4535 4536 4537 /* 4538 * called only from exit_thread() 4539 * we come here only if the task has a context attached (loaded or masked) 4540 */ 4541 void 4542 pfm_exit_thread(struct task_struct *task) 4543 { 4544 pfm_context_t *ctx; 4545 unsigned long flags; 4546 struct pt_regs *regs = task_pt_regs(task); 4547 int ret, state; 4548 int free_ok = 0; 4549 4550 ctx = PFM_GET_CTX(task); 4551 4552 PROTECT_CTX(ctx, flags); 4553 4554 DPRINT(("state=%d task [%d]\n", ctx->ctx_state, task_pid_nr(task))); 4555 4556 state = ctx->ctx_state; 4557 switch(state) { 4558 case PFM_CTX_UNLOADED: 4559 /* 4560 * only comes to this function if pfm_context is not NULL, i.e., cannot 4561 * be in unloaded state 4562 */ 4563 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] ctx unloaded\n", task_pid_nr(task)); 4564 break; 4565 case PFM_CTX_LOADED: 4566 case PFM_CTX_MASKED: 4567 ret = pfm_context_unload(ctx, NULL, 0, regs); 4568 if (ret) { 4569 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4570 } 4571 DPRINT(("ctx unloaded for current state was %d\n", state)); 4572 4573 pfm_end_notify_user(ctx); 4574 break; 4575 case PFM_CTX_ZOMBIE: 4576 ret = pfm_context_unload(ctx, NULL, 0, regs); 4577 if (ret) { 4578 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] state=%d unload failed %d\n", task_pid_nr(task), state, ret); 4579 } 4580 free_ok = 1; 4581 break; 4582 default: 4583 printk(KERN_ERR "perfmon: pfm_exit_thread [%d] unexpected state=%d\n", task_pid_nr(task), state); 4584 break; 4585 } 4586 UNPROTECT_CTX(ctx, flags); 4587 4588 { u64 psr = pfm_get_psr(); 4589 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 4590 BUG_ON(GET_PMU_OWNER()); 4591 BUG_ON(ia64_psr(regs)->up); 4592 BUG_ON(ia64_psr(regs)->pp); 4593 } 4594 4595 /* 4596 * All memory free operations (especially for vmalloc'ed memory) 4597 * MUST be done with interrupts ENABLED. 4598 */ 4599 if (free_ok) pfm_context_free(ctx); 4600 } 4601 4602 /* 4603 * functions MUST be listed in the increasing order of their index (see permfon.h) 4604 */ 4605 #define PFM_CMD(name, flags, arg_count, arg_type, getsz) { name, #name, flags, arg_count, sizeof(arg_type), getsz } 4606 #define PFM_CMD_S(name, flags) { name, #name, flags, 0, 0, NULL } 4607 #define PFM_CMD_PCLRWS (PFM_CMD_FD|PFM_CMD_ARG_RW|PFM_CMD_STOP) 4608 #define PFM_CMD_PCLRW (PFM_CMD_FD|PFM_CMD_ARG_RW) 4609 #define PFM_CMD_NONE { NULL, "no-cmd", 0, 0, 0, NULL} 4610 4611 static pfm_cmd_desc_t pfm_cmd_tab[]={ 4612 /* 0 */PFM_CMD_NONE, 4613 /* 1 */PFM_CMD(pfm_write_pmcs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4614 /* 2 */PFM_CMD(pfm_write_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4615 /* 3 */PFM_CMD(pfm_read_pmds, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4616 /* 4 */PFM_CMD_S(pfm_stop, PFM_CMD_PCLRWS), 4617 /* 5 */PFM_CMD_S(pfm_start, PFM_CMD_PCLRWS), 4618 /* 6 */PFM_CMD_NONE, 4619 /* 7 */PFM_CMD_NONE, 4620 /* 8 */PFM_CMD(pfm_context_create, PFM_CMD_ARG_RW, 1, pfarg_context_t, pfm_ctx_getsize), 4621 /* 9 */PFM_CMD_NONE, 4622 /* 10 */PFM_CMD_S(pfm_restart, PFM_CMD_PCLRW), 4623 /* 11 */PFM_CMD_NONE, 4624 /* 12 */PFM_CMD(pfm_get_features, PFM_CMD_ARG_RW, 1, pfarg_features_t, NULL), 4625 /* 13 */PFM_CMD(pfm_debug, 0, 1, unsigned int, NULL), 4626 /* 14 */PFM_CMD_NONE, 4627 /* 15 */PFM_CMD(pfm_get_pmc_reset, PFM_CMD_ARG_RW, PFM_CMD_ARG_MANY, pfarg_reg_t, NULL), 4628 /* 16 */PFM_CMD(pfm_context_load, PFM_CMD_PCLRWS, 1, pfarg_load_t, NULL), 4629 /* 17 */PFM_CMD_S(pfm_context_unload, PFM_CMD_PCLRWS), 4630 /* 18 */PFM_CMD_NONE, 4631 /* 19 */PFM_CMD_NONE, 4632 /* 20 */PFM_CMD_NONE, 4633 /* 21 */PFM_CMD_NONE, 4634 /* 22 */PFM_CMD_NONE, 4635 /* 23 */PFM_CMD_NONE, 4636 /* 24 */PFM_CMD_NONE, 4637 /* 25 */PFM_CMD_NONE, 4638 /* 26 */PFM_CMD_NONE, 4639 /* 27 */PFM_CMD_NONE, 4640 /* 28 */PFM_CMD_NONE, 4641 /* 29 */PFM_CMD_NONE, 4642 /* 30 */PFM_CMD_NONE, 4643 /* 31 */PFM_CMD_NONE, 4644 /* 32 */PFM_CMD(pfm_write_ibrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL), 4645 /* 33 */PFM_CMD(pfm_write_dbrs, PFM_CMD_PCLRWS, PFM_CMD_ARG_MANY, pfarg_dbreg_t, NULL) 4646 }; 4647 #define PFM_CMD_COUNT (sizeof(pfm_cmd_tab)/sizeof(pfm_cmd_desc_t)) 4648 4649 static int 4650 pfm_check_task_state(pfm_context_t *ctx, int cmd, unsigned long flags) 4651 { 4652 struct task_struct *task; 4653 int state, old_state; 4654 4655 recheck: 4656 state = ctx->ctx_state; 4657 task = ctx->ctx_task; 4658 4659 if (task == NULL) { 4660 DPRINT(("context %d no task, state=%d\n", ctx->ctx_fd, state)); 4661 return 0; 4662 } 4663 4664 DPRINT(("context %d state=%d [%d] task_state=%ld must_stop=%d\n", 4665 ctx->ctx_fd, 4666 state, 4667 task_pid_nr(task), 4668 task->state, PFM_CMD_STOPPED(cmd))); 4669 4670 /* 4671 * self-monitoring always ok. 4672 * 4673 * for system-wide the caller can either be the creator of the 4674 * context (to one to which the context is attached to) OR 4675 * a task running on the same CPU as the session. 4676 */ 4677 if (task == current || ctx->ctx_fl_system) return 0; 4678 4679 /* 4680 * we are monitoring another thread 4681 */ 4682 switch(state) { 4683 case PFM_CTX_UNLOADED: 4684 /* 4685 * if context is UNLOADED we are safe to go 4686 */ 4687 return 0; 4688 case PFM_CTX_ZOMBIE: 4689 /* 4690 * no command can operate on a zombie context 4691 */ 4692 DPRINT(("cmd %d state zombie cannot operate on context\n", cmd)); 4693 return -EINVAL; 4694 case PFM_CTX_MASKED: 4695 /* 4696 * PMU state has been saved to software even though 4697 * the thread may still be running. 4698 */ 4699 if (cmd != PFM_UNLOAD_CONTEXT) return 0; 4700 } 4701 4702 /* 4703 * context is LOADED or MASKED. Some commands may need to have 4704 * the task stopped. 4705 * 4706 * We could lift this restriction for UP but it would mean that 4707 * the user has no guarantee the task would not run between 4708 * two successive calls to perfmonctl(). That's probably OK. 4709 * If this user wants to ensure the task does not run, then 4710 * the task must be stopped. 4711 */ 4712 if (PFM_CMD_STOPPED(cmd)) { 4713 if (!task_is_stopped_or_traced(task)) { 4714 DPRINT(("[%d] task not in stopped state\n", task_pid_nr(task))); 4715 return -EBUSY; 4716 } 4717 /* 4718 * task is now stopped, wait for ctxsw out 4719 * 4720 * This is an interesting point in the code. 4721 * We need to unprotect the context because 4722 * the pfm_save_regs() routines needs to grab 4723 * the same lock. There are danger in doing 4724 * this because it leaves a window open for 4725 * another task to get access to the context 4726 * and possibly change its state. The one thing 4727 * that is not possible is for the context to disappear 4728 * because we are protected by the VFS layer, i.e., 4729 * get_fd()/put_fd(). 4730 */ 4731 old_state = state; 4732 4733 UNPROTECT_CTX(ctx, flags); 4734 4735 wait_task_inactive(task, 0); 4736 4737 PROTECT_CTX(ctx, flags); 4738 4739 /* 4740 * we must recheck to verify if state has changed 4741 */ 4742 if (ctx->ctx_state != old_state) { 4743 DPRINT(("old_state=%d new_state=%d\n", old_state, ctx->ctx_state)); 4744 goto recheck; 4745 } 4746 } 4747 return 0; 4748 } 4749 4750 /* 4751 * system-call entry point (must return long) 4752 */ 4753 asmlinkage long 4754 sys_perfmonctl (int fd, int cmd, void __user *arg, int count) 4755 { 4756 struct fd f = {NULL, 0}; 4757 pfm_context_t *ctx = NULL; 4758 unsigned long flags = 0UL; 4759 void *args_k = NULL; 4760 long ret; /* will expand int return types */ 4761 size_t base_sz, sz, xtra_sz = 0; 4762 int narg, completed_args = 0, call_made = 0, cmd_flags; 4763 int (*func)(pfm_context_t *ctx, void *arg, int count, struct pt_regs *regs); 4764 int (*getsize)(void *arg, size_t *sz); 4765 #define PFM_MAX_ARGSIZE 4096 4766 4767 /* 4768 * reject any call if perfmon was disabled at initialization 4769 */ 4770 if (unlikely(pmu_conf == NULL)) return -ENOSYS; 4771 4772 if (unlikely(cmd < 0 || cmd >= PFM_CMD_COUNT)) { 4773 DPRINT(("invalid cmd=%d\n", cmd)); 4774 return -EINVAL; 4775 } 4776 4777 func = pfm_cmd_tab[cmd].cmd_func; 4778 narg = pfm_cmd_tab[cmd].cmd_narg; 4779 base_sz = pfm_cmd_tab[cmd].cmd_argsize; 4780 getsize = pfm_cmd_tab[cmd].cmd_getsize; 4781 cmd_flags = pfm_cmd_tab[cmd].cmd_flags; 4782 4783 if (unlikely(func == NULL)) { 4784 DPRINT(("invalid cmd=%d\n", cmd)); 4785 return -EINVAL; 4786 } 4787 4788 DPRINT(("cmd=%s idx=%d narg=0x%x argsz=%lu count=%d\n", 4789 PFM_CMD_NAME(cmd), 4790 cmd, 4791 narg, 4792 base_sz, 4793 count)); 4794 4795 /* 4796 * check if number of arguments matches what the command expects 4797 */ 4798 if (unlikely((narg == PFM_CMD_ARG_MANY && count <= 0) || (narg > 0 && narg != count))) 4799 return -EINVAL; 4800 4801 restart_args: 4802 sz = xtra_sz + base_sz*count; 4803 /* 4804 * limit abuse to min page size 4805 */ 4806 if (unlikely(sz > PFM_MAX_ARGSIZE)) { 4807 printk(KERN_ERR "perfmon: [%d] argument too big %lu\n", task_pid_nr(current), sz); 4808 return -E2BIG; 4809 } 4810 4811 /* 4812 * allocate default-sized argument buffer 4813 */ 4814 if (likely(count && args_k == NULL)) { 4815 args_k = kmalloc(PFM_MAX_ARGSIZE, GFP_KERNEL); 4816 if (args_k == NULL) return -ENOMEM; 4817 } 4818 4819 ret = -EFAULT; 4820 4821 /* 4822 * copy arguments 4823 * 4824 * assume sz = 0 for command without parameters 4825 */ 4826 if (sz && copy_from_user(args_k, arg, sz)) { 4827 DPRINT(("cannot copy_from_user %lu bytes @%p\n", sz, arg)); 4828 goto error_args; 4829 } 4830 4831 /* 4832 * check if command supports extra parameters 4833 */ 4834 if (completed_args == 0 && getsize) { 4835 /* 4836 * get extra parameters size (based on main argument) 4837 */ 4838 ret = (*getsize)(args_k, &xtra_sz); 4839 if (ret) goto error_args; 4840 4841 completed_args = 1; 4842 4843 DPRINT(("restart_args sz=%lu xtra_sz=%lu\n", sz, xtra_sz)); 4844 4845 /* retry if necessary */ 4846 if (likely(xtra_sz)) goto restart_args; 4847 } 4848 4849 if (unlikely((cmd_flags & PFM_CMD_FD) == 0)) goto skip_fd; 4850 4851 ret = -EBADF; 4852 4853 f = fdget(fd); 4854 if (unlikely(f.file == NULL)) { 4855 DPRINT(("invalid fd %d\n", fd)); 4856 goto error_args; 4857 } 4858 if (unlikely(PFM_IS_FILE(f.file) == 0)) { 4859 DPRINT(("fd %d not related to perfmon\n", fd)); 4860 goto error_args; 4861 } 4862 4863 ctx = f.file->private_data; 4864 if (unlikely(ctx == NULL)) { 4865 DPRINT(("no context for fd %d\n", fd)); 4866 goto error_args; 4867 } 4868 prefetch(&ctx->ctx_state); 4869 4870 PROTECT_CTX(ctx, flags); 4871 4872 /* 4873 * check task is stopped 4874 */ 4875 ret = pfm_check_task_state(ctx, cmd, flags); 4876 if (unlikely(ret)) goto abort_locked; 4877 4878 skip_fd: 4879 ret = (*func)(ctx, args_k, count, task_pt_regs(current)); 4880 4881 call_made = 1; 4882 4883 abort_locked: 4884 if (likely(ctx)) { 4885 DPRINT(("context unlocked\n")); 4886 UNPROTECT_CTX(ctx, flags); 4887 } 4888 4889 /* copy argument back to user, if needed */ 4890 if (call_made && PFM_CMD_RW_ARG(cmd) && copy_to_user(arg, args_k, base_sz*count)) ret = -EFAULT; 4891 4892 error_args: 4893 if (f.file) 4894 fdput(f); 4895 4896 kfree(args_k); 4897 4898 DPRINT(("cmd=%s ret=%ld\n", PFM_CMD_NAME(cmd), ret)); 4899 4900 return ret; 4901 } 4902 4903 static void 4904 pfm_resume_after_ovfl(pfm_context_t *ctx, unsigned long ovfl_regs, struct pt_regs *regs) 4905 { 4906 pfm_buffer_fmt_t *fmt = ctx->ctx_buf_fmt; 4907 pfm_ovfl_ctrl_t rst_ctrl; 4908 int state; 4909 int ret = 0; 4910 4911 state = ctx->ctx_state; 4912 /* 4913 * Unlock sampling buffer and reset index atomically 4914 * XXX: not really needed when blocking 4915 */ 4916 if (CTX_HAS_SMPL(ctx)) { 4917 4918 rst_ctrl.bits.mask_monitoring = 0; 4919 rst_ctrl.bits.reset_ovfl_pmds = 0; 4920 4921 if (state == PFM_CTX_LOADED) 4922 ret = pfm_buf_fmt_restart_active(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4923 else 4924 ret = pfm_buf_fmt_restart(fmt, current, &rst_ctrl, ctx->ctx_smpl_hdr, regs); 4925 } else { 4926 rst_ctrl.bits.mask_monitoring = 0; 4927 rst_ctrl.bits.reset_ovfl_pmds = 1; 4928 } 4929 4930 if (ret == 0) { 4931 if (rst_ctrl.bits.reset_ovfl_pmds) { 4932 pfm_reset_regs(ctx, &ovfl_regs, PFM_PMD_LONG_RESET); 4933 } 4934 if (rst_ctrl.bits.mask_monitoring == 0) { 4935 DPRINT(("resuming monitoring\n")); 4936 if (ctx->ctx_state == PFM_CTX_MASKED) pfm_restore_monitoring(current); 4937 } else { 4938 DPRINT(("stopping monitoring\n")); 4939 //pfm_stop_monitoring(current, regs); 4940 } 4941 ctx->ctx_state = PFM_CTX_LOADED; 4942 } 4943 } 4944 4945 /* 4946 * context MUST BE LOCKED when calling 4947 * can only be called for current 4948 */ 4949 static void 4950 pfm_context_force_terminate(pfm_context_t *ctx, struct pt_regs *regs) 4951 { 4952 int ret; 4953 4954 DPRINT(("entering for [%d]\n", task_pid_nr(current))); 4955 4956 ret = pfm_context_unload(ctx, NULL, 0, regs); 4957 if (ret) { 4958 printk(KERN_ERR "pfm_context_force_terminate: [%d] unloaded failed with %d\n", task_pid_nr(current), ret); 4959 } 4960 4961 /* 4962 * and wakeup controlling task, indicating we are now disconnected 4963 */ 4964 wake_up_interruptible(&ctx->ctx_zombieq); 4965 4966 /* 4967 * given that context is still locked, the controlling 4968 * task will only get access when we return from 4969 * pfm_handle_work(). 4970 */ 4971 } 4972 4973 static int pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds); 4974 4975 /* 4976 * pfm_handle_work() can be called with interrupts enabled 4977 * (TIF_NEED_RESCHED) or disabled. The down_interruptible 4978 * call may sleep, therefore we must re-enable interrupts 4979 * to avoid deadlocks. It is safe to do so because this function 4980 * is called ONLY when returning to user level (pUStk=1), in which case 4981 * there is no risk of kernel stack overflow due to deep 4982 * interrupt nesting. 4983 */ 4984 void 4985 pfm_handle_work(void) 4986 { 4987 pfm_context_t *ctx; 4988 struct pt_regs *regs; 4989 unsigned long flags, dummy_flags; 4990 unsigned long ovfl_regs; 4991 unsigned int reason; 4992 int ret; 4993 4994 ctx = PFM_GET_CTX(current); 4995 if (ctx == NULL) { 4996 printk(KERN_ERR "perfmon: [%d] has no PFM context\n", 4997 task_pid_nr(current)); 4998 return; 4999 } 5000 5001 PROTECT_CTX(ctx, flags); 5002 5003 PFM_SET_WORK_PENDING(current, 0); 5004 5005 regs = task_pt_regs(current); 5006 5007 /* 5008 * extract reason for being here and clear 5009 */ 5010 reason = ctx->ctx_fl_trap_reason; 5011 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_NONE; 5012 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5013 5014 DPRINT(("reason=%d state=%d\n", reason, ctx->ctx_state)); 5015 5016 /* 5017 * must be done before we check for simple-reset mode 5018 */ 5019 if (ctx->ctx_fl_going_zombie || ctx->ctx_state == PFM_CTX_ZOMBIE) 5020 goto do_zombie; 5021 5022 //if (CTX_OVFL_NOBLOCK(ctx)) goto skip_blocking; 5023 if (reason == PFM_TRAP_REASON_RESET) 5024 goto skip_blocking; 5025 5026 /* 5027 * restore interrupt mask to what it was on entry. 5028 * Could be enabled/diasbled. 5029 */ 5030 UNPROTECT_CTX(ctx, flags); 5031 5032 /* 5033 * force interrupt enable because of down_interruptible() 5034 */ 5035 local_irq_enable(); 5036 5037 DPRINT(("before block sleeping\n")); 5038 5039 /* 5040 * may go through without blocking on SMP systems 5041 * if restart has been received already by the time we call down() 5042 */ 5043 ret = wait_for_completion_interruptible(&ctx->ctx_restart_done); 5044 5045 DPRINT(("after block sleeping ret=%d\n", ret)); 5046 5047 /* 5048 * lock context and mask interrupts again 5049 * We save flags into a dummy because we may have 5050 * altered interrupts mask compared to entry in this 5051 * function. 5052 */ 5053 PROTECT_CTX(ctx, dummy_flags); 5054 5055 /* 5056 * we need to read the ovfl_regs only after wake-up 5057 * because we may have had pfm_write_pmds() in between 5058 * and that can changed PMD values and therefore 5059 * ovfl_regs is reset for these new PMD values. 5060 */ 5061 ovfl_regs = ctx->ctx_ovfl_regs[0]; 5062 5063 if (ctx->ctx_fl_going_zombie) { 5064 do_zombie: 5065 DPRINT(("context is zombie, bailing out\n")); 5066 pfm_context_force_terminate(ctx, regs); 5067 goto nothing_to_do; 5068 } 5069 /* 5070 * in case of interruption of down() we don't restart anything 5071 */ 5072 if (ret < 0) 5073 goto nothing_to_do; 5074 5075 skip_blocking: 5076 pfm_resume_after_ovfl(ctx, ovfl_regs, regs); 5077 ctx->ctx_ovfl_regs[0] = 0UL; 5078 5079 nothing_to_do: 5080 /* 5081 * restore flags as they were upon entry 5082 */ 5083 UNPROTECT_CTX(ctx, flags); 5084 } 5085 5086 static int 5087 pfm_notify_user(pfm_context_t *ctx, pfm_msg_t *msg) 5088 { 5089 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5090 DPRINT(("ignoring overflow notification, owner is zombie\n")); 5091 return 0; 5092 } 5093 5094 DPRINT(("waking up somebody\n")); 5095 5096 if (msg) wake_up_interruptible(&ctx->ctx_msgq_wait); 5097 5098 /* 5099 * safe, we are not in intr handler, nor in ctxsw when 5100 * we come here 5101 */ 5102 kill_fasync (&ctx->ctx_async_queue, SIGIO, POLL_IN); 5103 5104 return 0; 5105 } 5106 5107 static int 5108 pfm_ovfl_notify_user(pfm_context_t *ctx, unsigned long ovfl_pmds) 5109 { 5110 pfm_msg_t *msg = NULL; 5111 5112 if (ctx->ctx_fl_no_msg == 0) { 5113 msg = pfm_get_new_msg(ctx); 5114 if (msg == NULL) { 5115 printk(KERN_ERR "perfmon: pfm_ovfl_notify_user no more notification msgs\n"); 5116 return -1; 5117 } 5118 5119 msg->pfm_ovfl_msg.msg_type = PFM_MSG_OVFL; 5120 msg->pfm_ovfl_msg.msg_ctx_fd = ctx->ctx_fd; 5121 msg->pfm_ovfl_msg.msg_active_set = 0; 5122 msg->pfm_ovfl_msg.msg_ovfl_pmds[0] = ovfl_pmds; 5123 msg->pfm_ovfl_msg.msg_ovfl_pmds[1] = 0UL; 5124 msg->pfm_ovfl_msg.msg_ovfl_pmds[2] = 0UL; 5125 msg->pfm_ovfl_msg.msg_ovfl_pmds[3] = 0UL; 5126 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5127 } 5128 5129 DPRINT(("ovfl msg: msg=%p no_msg=%d fd=%d ovfl_pmds=0x%lx\n", 5130 msg, 5131 ctx->ctx_fl_no_msg, 5132 ctx->ctx_fd, 5133 ovfl_pmds)); 5134 5135 return pfm_notify_user(ctx, msg); 5136 } 5137 5138 static int 5139 pfm_end_notify_user(pfm_context_t *ctx) 5140 { 5141 pfm_msg_t *msg; 5142 5143 msg = pfm_get_new_msg(ctx); 5144 if (msg == NULL) { 5145 printk(KERN_ERR "perfmon: pfm_end_notify_user no more notification msgs\n"); 5146 return -1; 5147 } 5148 /* no leak */ 5149 memset(msg, 0, sizeof(*msg)); 5150 5151 msg->pfm_end_msg.msg_type = PFM_MSG_END; 5152 msg->pfm_end_msg.msg_ctx_fd = ctx->ctx_fd; 5153 msg->pfm_ovfl_msg.msg_tstamp = 0UL; 5154 5155 DPRINT(("end msg: msg=%p no_msg=%d ctx_fd=%d\n", 5156 msg, 5157 ctx->ctx_fl_no_msg, 5158 ctx->ctx_fd)); 5159 5160 return pfm_notify_user(ctx, msg); 5161 } 5162 5163 /* 5164 * main overflow processing routine. 5165 * it can be called from the interrupt path or explicitly during the context switch code 5166 */ 5167 static void pfm_overflow_handler(struct task_struct *task, pfm_context_t *ctx, 5168 unsigned long pmc0, struct pt_regs *regs) 5169 { 5170 pfm_ovfl_arg_t *ovfl_arg; 5171 unsigned long mask; 5172 unsigned long old_val, ovfl_val, new_val; 5173 unsigned long ovfl_notify = 0UL, ovfl_pmds = 0UL, smpl_pmds = 0UL, reset_pmds; 5174 unsigned long tstamp; 5175 pfm_ovfl_ctrl_t ovfl_ctrl; 5176 unsigned int i, has_smpl; 5177 int must_notify = 0; 5178 5179 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) goto stop_monitoring; 5180 5181 /* 5182 * sanity test. Should never happen 5183 */ 5184 if (unlikely((pmc0 & 0x1) == 0)) goto sanity_check; 5185 5186 tstamp = ia64_get_itc(); 5187 mask = pmc0 >> PMU_FIRST_COUNTER; 5188 ovfl_val = pmu_conf->ovfl_val; 5189 has_smpl = CTX_HAS_SMPL(ctx); 5190 5191 DPRINT_ovfl(("pmc0=0x%lx pid=%d iip=0x%lx, %s " 5192 "used_pmds=0x%lx\n", 5193 pmc0, 5194 task ? task_pid_nr(task): -1, 5195 (regs ? regs->cr_iip : 0), 5196 CTX_OVFL_NOBLOCK(ctx) ? "nonblocking" : "blocking", 5197 ctx->ctx_used_pmds[0])); 5198 5199 5200 /* 5201 * first we update the virtual counters 5202 * assume there was a prior ia64_srlz_d() issued 5203 */ 5204 for (i = PMU_FIRST_COUNTER; mask ; i++, mask >>= 1) { 5205 5206 /* skip pmd which did not overflow */ 5207 if ((mask & 0x1) == 0) continue; 5208 5209 /* 5210 * Note that the pmd is not necessarily 0 at this point as qualified events 5211 * may have happened before the PMU was frozen. The residual count is not 5212 * taken into consideration here but will be with any read of the pmd via 5213 * pfm_read_pmds(). 5214 */ 5215 old_val = new_val = ctx->ctx_pmds[i].val; 5216 new_val += 1 + ovfl_val; 5217 ctx->ctx_pmds[i].val = new_val; 5218 5219 /* 5220 * check for overflow condition 5221 */ 5222 if (likely(old_val > new_val)) { 5223 ovfl_pmds |= 1UL << i; 5224 if (PMC_OVFL_NOTIFY(ctx, i)) ovfl_notify |= 1UL << i; 5225 } 5226 5227 DPRINT_ovfl(("ctx_pmd[%d].val=0x%lx old_val=0x%lx pmd=0x%lx ovfl_pmds=0x%lx ovfl_notify=0x%lx\n", 5228 i, 5229 new_val, 5230 old_val, 5231 ia64_get_pmd(i) & ovfl_val, 5232 ovfl_pmds, 5233 ovfl_notify)); 5234 } 5235 5236 /* 5237 * there was no 64-bit overflow, nothing else to do 5238 */ 5239 if (ovfl_pmds == 0UL) return; 5240 5241 /* 5242 * reset all control bits 5243 */ 5244 ovfl_ctrl.val = 0; 5245 reset_pmds = 0UL; 5246 5247 /* 5248 * if a sampling format module exists, then we "cache" the overflow by 5249 * calling the module's handler() routine. 5250 */ 5251 if (has_smpl) { 5252 unsigned long start_cycles, end_cycles; 5253 unsigned long pmd_mask; 5254 int j, k, ret = 0; 5255 int this_cpu = smp_processor_id(); 5256 5257 pmd_mask = ovfl_pmds >> PMU_FIRST_COUNTER; 5258 ovfl_arg = &ctx->ctx_ovfl_arg; 5259 5260 prefetch(ctx->ctx_smpl_hdr); 5261 5262 for(i=PMU_FIRST_COUNTER; pmd_mask && ret == 0; i++, pmd_mask >>=1) { 5263 5264 mask = 1UL << i; 5265 5266 if ((pmd_mask & 0x1) == 0) continue; 5267 5268 ovfl_arg->ovfl_pmd = (unsigned char )i; 5269 ovfl_arg->ovfl_notify = ovfl_notify & mask ? 1 : 0; 5270 ovfl_arg->active_set = 0; 5271 ovfl_arg->ovfl_ctrl.val = 0; /* module must fill in all fields */ 5272 ovfl_arg->smpl_pmds[0] = smpl_pmds = ctx->ctx_pmds[i].smpl_pmds[0]; 5273 5274 ovfl_arg->pmd_value = ctx->ctx_pmds[i].val; 5275 ovfl_arg->pmd_last_reset = ctx->ctx_pmds[i].lval; 5276 ovfl_arg->pmd_eventid = ctx->ctx_pmds[i].eventid; 5277 5278 /* 5279 * copy values of pmds of interest. Sampling format may copy them 5280 * into sampling buffer. 5281 */ 5282 if (smpl_pmds) { 5283 for(j=0, k=0; smpl_pmds; j++, smpl_pmds >>=1) { 5284 if ((smpl_pmds & 0x1) == 0) continue; 5285 ovfl_arg->smpl_pmds_values[k++] = PMD_IS_COUNTING(j) ? pfm_read_soft_counter(ctx, j) : ia64_get_pmd(j); 5286 DPRINT_ovfl(("smpl_pmd[%d]=pmd%u=0x%lx\n", k-1, j, ovfl_arg->smpl_pmds_values[k-1])); 5287 } 5288 } 5289 5290 pfm_stats[this_cpu].pfm_smpl_handler_calls++; 5291 5292 start_cycles = ia64_get_itc(); 5293 5294 /* 5295 * call custom buffer format record (handler) routine 5296 */ 5297 ret = (*ctx->ctx_buf_fmt->fmt_handler)(task, ctx->ctx_smpl_hdr, ovfl_arg, regs, tstamp); 5298 5299 end_cycles = ia64_get_itc(); 5300 5301 /* 5302 * For those controls, we take the union because they have 5303 * an all or nothing behavior. 5304 */ 5305 ovfl_ctrl.bits.notify_user |= ovfl_arg->ovfl_ctrl.bits.notify_user; 5306 ovfl_ctrl.bits.block_task |= ovfl_arg->ovfl_ctrl.bits.block_task; 5307 ovfl_ctrl.bits.mask_monitoring |= ovfl_arg->ovfl_ctrl.bits.mask_monitoring; 5308 /* 5309 * build the bitmask of pmds to reset now 5310 */ 5311 if (ovfl_arg->ovfl_ctrl.bits.reset_ovfl_pmds) reset_pmds |= mask; 5312 5313 pfm_stats[this_cpu].pfm_smpl_handler_cycles += end_cycles - start_cycles; 5314 } 5315 /* 5316 * when the module cannot handle the rest of the overflows, we abort right here 5317 */ 5318 if (ret && pmd_mask) { 5319 DPRINT(("handler aborts leftover ovfl_pmds=0x%lx\n", 5320 pmd_mask<<PMU_FIRST_COUNTER)); 5321 } 5322 /* 5323 * remove the pmds we reset now from the set of pmds to reset in pfm_restart() 5324 */ 5325 ovfl_pmds &= ~reset_pmds; 5326 } else { 5327 /* 5328 * when no sampling module is used, then the default 5329 * is to notify on overflow if requested by user 5330 */ 5331 ovfl_ctrl.bits.notify_user = ovfl_notify ? 1 : 0; 5332 ovfl_ctrl.bits.block_task = ovfl_notify ? 1 : 0; 5333 ovfl_ctrl.bits.mask_monitoring = ovfl_notify ? 1 : 0; /* XXX: change for saturation */ 5334 ovfl_ctrl.bits.reset_ovfl_pmds = ovfl_notify ? 0 : 1; 5335 /* 5336 * if needed, we reset all overflowed pmds 5337 */ 5338 if (ovfl_notify == 0) reset_pmds = ovfl_pmds; 5339 } 5340 5341 DPRINT_ovfl(("ovfl_pmds=0x%lx reset_pmds=0x%lx\n", ovfl_pmds, reset_pmds)); 5342 5343 /* 5344 * reset the requested PMD registers using the short reset values 5345 */ 5346 if (reset_pmds) { 5347 unsigned long bm = reset_pmds; 5348 pfm_reset_regs(ctx, &bm, PFM_PMD_SHORT_RESET); 5349 } 5350 5351 if (ovfl_notify && ovfl_ctrl.bits.notify_user) { 5352 /* 5353 * keep track of what to reset when unblocking 5354 */ 5355 ctx->ctx_ovfl_regs[0] = ovfl_pmds; 5356 5357 /* 5358 * check for blocking context 5359 */ 5360 if (CTX_OVFL_NOBLOCK(ctx) == 0 && ovfl_ctrl.bits.block_task) { 5361 5362 ctx->ctx_fl_trap_reason = PFM_TRAP_REASON_BLOCK; 5363 5364 /* 5365 * set the perfmon specific checking pending work for the task 5366 */ 5367 PFM_SET_WORK_PENDING(task, 1); 5368 5369 /* 5370 * when coming from ctxsw, current still points to the 5371 * previous task, therefore we must work with task and not current. 5372 */ 5373 set_notify_resume(task); 5374 } 5375 /* 5376 * defer until state is changed (shorten spin window). the context is locked 5377 * anyway, so the signal receiver would come spin for nothing. 5378 */ 5379 must_notify = 1; 5380 } 5381 5382 DPRINT_ovfl(("owner [%d] pending=%ld reason=%u ovfl_pmds=0x%lx ovfl_notify=0x%lx masked=%d\n", 5383 GET_PMU_OWNER() ? task_pid_nr(GET_PMU_OWNER()) : -1, 5384 PFM_GET_WORK_PENDING(task), 5385 ctx->ctx_fl_trap_reason, 5386 ovfl_pmds, 5387 ovfl_notify, 5388 ovfl_ctrl.bits.mask_monitoring ? 1 : 0)); 5389 /* 5390 * in case monitoring must be stopped, we toggle the psr bits 5391 */ 5392 if (ovfl_ctrl.bits.mask_monitoring) { 5393 pfm_mask_monitoring(task); 5394 ctx->ctx_state = PFM_CTX_MASKED; 5395 ctx->ctx_fl_can_restart = 1; 5396 } 5397 5398 /* 5399 * send notification now 5400 */ 5401 if (must_notify) pfm_ovfl_notify_user(ctx, ovfl_notify); 5402 5403 return; 5404 5405 sanity_check: 5406 printk(KERN_ERR "perfmon: CPU%d overflow handler [%d] pmc0=0x%lx\n", 5407 smp_processor_id(), 5408 task ? task_pid_nr(task) : -1, 5409 pmc0); 5410 return; 5411 5412 stop_monitoring: 5413 /* 5414 * in SMP, zombie context is never restored but reclaimed in pfm_load_regs(). 5415 * Moreover, zombies are also reclaimed in pfm_save_regs(). Therefore we can 5416 * come here as zombie only if the task is the current task. In which case, we 5417 * can access the PMU hardware directly. 5418 * 5419 * Note that zombies do have PM_VALID set. So here we do the minimal. 5420 * 5421 * In case the context was zombified it could not be reclaimed at the time 5422 * the monitoring program exited. At this point, the PMU reservation has been 5423 * returned, the sampiing buffer has been freed. We must convert this call 5424 * into a spurious interrupt. However, we must also avoid infinite overflows 5425 * by stopping monitoring for this task. We can only come here for a per-task 5426 * context. All we need to do is to stop monitoring using the psr bits which 5427 * are always task private. By re-enabling secure montioring, we ensure that 5428 * the monitored task will not be able to re-activate monitoring. 5429 * The task will eventually be context switched out, at which point the context 5430 * will be reclaimed (that includes releasing ownership of the PMU). 5431 * 5432 * So there might be a window of time where the number of per-task session is zero 5433 * yet one PMU might have a owner and get at most one overflow interrupt for a zombie 5434 * context. This is safe because if a per-task session comes in, it will push this one 5435 * out and by the virtue on pfm_save_regs(), this one will disappear. If a system wide 5436 * session is force on that CPU, given that we use task pinning, pfm_save_regs() will 5437 * also push our zombie context out. 5438 * 5439 * Overall pretty hairy stuff.... 5440 */ 5441 DPRINT(("ctx is zombie for [%d], converted to spurious\n", task ? task_pid_nr(task): -1)); 5442 pfm_clear_psr_up(); 5443 ia64_psr(regs)->up = 0; 5444 ia64_psr(regs)->sp = 1; 5445 return; 5446 } 5447 5448 static int 5449 pfm_do_interrupt_handler(void *arg, struct pt_regs *regs) 5450 { 5451 struct task_struct *task; 5452 pfm_context_t *ctx; 5453 unsigned long flags; 5454 u64 pmc0; 5455 int this_cpu = smp_processor_id(); 5456 int retval = 0; 5457 5458 pfm_stats[this_cpu].pfm_ovfl_intr_count++; 5459 5460 /* 5461 * srlz.d done before arriving here 5462 */ 5463 pmc0 = ia64_get_pmc(0); 5464 5465 task = GET_PMU_OWNER(); 5466 ctx = GET_PMU_CTX(); 5467 5468 /* 5469 * if we have some pending bits set 5470 * assumes : if any PMC0.bit[63-1] is set, then PMC0.fr = 1 5471 */ 5472 if (PMC0_HAS_OVFL(pmc0) && task) { 5473 /* 5474 * we assume that pmc0.fr is always set here 5475 */ 5476 5477 /* sanity check */ 5478 if (!ctx) goto report_spurious1; 5479 5480 if (ctx->ctx_fl_system == 0 && (task->thread.flags & IA64_THREAD_PM_VALID) == 0) 5481 goto report_spurious2; 5482 5483 PROTECT_CTX_NOPRINT(ctx, flags); 5484 5485 pfm_overflow_handler(task, ctx, pmc0, regs); 5486 5487 UNPROTECT_CTX_NOPRINT(ctx, flags); 5488 5489 } else { 5490 pfm_stats[this_cpu].pfm_spurious_ovfl_intr_count++; 5491 retval = -1; 5492 } 5493 /* 5494 * keep it unfrozen at all times 5495 */ 5496 pfm_unfreeze_pmu(); 5497 5498 return retval; 5499 5500 report_spurious1: 5501 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d has no PFM context\n", 5502 this_cpu, task_pid_nr(task)); 5503 pfm_unfreeze_pmu(); 5504 return -1; 5505 report_spurious2: 5506 printk(KERN_INFO "perfmon: spurious overflow interrupt on CPU%d: process %d, invalid flag\n", 5507 this_cpu, 5508 task_pid_nr(task)); 5509 pfm_unfreeze_pmu(); 5510 return -1; 5511 } 5512 5513 static irqreturn_t 5514 pfm_interrupt_handler(int irq, void *arg) 5515 { 5516 unsigned long start_cycles, total_cycles; 5517 unsigned long min, max; 5518 int this_cpu; 5519 int ret; 5520 struct pt_regs *regs = get_irq_regs(); 5521 5522 this_cpu = get_cpu(); 5523 if (likely(!pfm_alt_intr_handler)) { 5524 min = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min; 5525 max = pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max; 5526 5527 start_cycles = ia64_get_itc(); 5528 5529 ret = pfm_do_interrupt_handler(arg, regs); 5530 5531 total_cycles = ia64_get_itc(); 5532 5533 /* 5534 * don't measure spurious interrupts 5535 */ 5536 if (likely(ret == 0)) { 5537 total_cycles -= start_cycles; 5538 5539 if (total_cycles < min) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_min = total_cycles; 5540 if (total_cycles > max) pfm_stats[this_cpu].pfm_ovfl_intr_cycles_max = total_cycles; 5541 5542 pfm_stats[this_cpu].pfm_ovfl_intr_cycles += total_cycles; 5543 } 5544 } 5545 else { 5546 (*pfm_alt_intr_handler->handler)(irq, arg, regs); 5547 } 5548 5549 put_cpu(); 5550 return IRQ_HANDLED; 5551 } 5552 5553 /* 5554 * /proc/perfmon interface, for debug only 5555 */ 5556 5557 #define PFM_PROC_SHOW_HEADER ((void *)(long)nr_cpu_ids+1) 5558 5559 static void * 5560 pfm_proc_start(struct seq_file *m, loff_t *pos) 5561 { 5562 if (*pos == 0) { 5563 return PFM_PROC_SHOW_HEADER; 5564 } 5565 5566 while (*pos <= nr_cpu_ids) { 5567 if (cpu_online(*pos - 1)) { 5568 return (void *)*pos; 5569 } 5570 ++*pos; 5571 } 5572 return NULL; 5573 } 5574 5575 static void * 5576 pfm_proc_next(struct seq_file *m, void *v, loff_t *pos) 5577 { 5578 ++*pos; 5579 return pfm_proc_start(m, pos); 5580 } 5581 5582 static void 5583 pfm_proc_stop(struct seq_file *m, void *v) 5584 { 5585 } 5586 5587 static void 5588 pfm_proc_show_header(struct seq_file *m) 5589 { 5590 struct list_head * pos; 5591 pfm_buffer_fmt_t * entry; 5592 unsigned long flags; 5593 5594 seq_printf(m, 5595 "perfmon version : %u.%u\n" 5596 "model : %s\n" 5597 "fastctxsw : %s\n" 5598 "expert mode : %s\n" 5599 "ovfl_mask : 0x%lx\n" 5600 "PMU flags : 0x%x\n", 5601 PFM_VERSION_MAJ, PFM_VERSION_MIN, 5602 pmu_conf->pmu_name, 5603 pfm_sysctl.fastctxsw > 0 ? "Yes": "No", 5604 pfm_sysctl.expert_mode > 0 ? "Yes": "No", 5605 pmu_conf->ovfl_val, 5606 pmu_conf->flags); 5607 5608 LOCK_PFS(flags); 5609 5610 seq_printf(m, 5611 "proc_sessions : %u\n" 5612 "sys_sessions : %u\n" 5613 "sys_use_dbregs : %u\n" 5614 "ptrace_use_dbregs : %u\n", 5615 pfm_sessions.pfs_task_sessions, 5616 pfm_sessions.pfs_sys_sessions, 5617 pfm_sessions.pfs_sys_use_dbregs, 5618 pfm_sessions.pfs_ptrace_use_dbregs); 5619 5620 UNLOCK_PFS(flags); 5621 5622 spin_lock(&pfm_buffer_fmt_lock); 5623 5624 list_for_each(pos, &pfm_buffer_fmt_list) { 5625 entry = list_entry(pos, pfm_buffer_fmt_t, fmt_list); 5626 seq_printf(m, "format : %16phD %s\n", 5627 entry->fmt_uuid, entry->fmt_name); 5628 } 5629 spin_unlock(&pfm_buffer_fmt_lock); 5630 5631 } 5632 5633 static int 5634 pfm_proc_show(struct seq_file *m, void *v) 5635 { 5636 unsigned long psr; 5637 unsigned int i; 5638 int cpu; 5639 5640 if (v == PFM_PROC_SHOW_HEADER) { 5641 pfm_proc_show_header(m); 5642 return 0; 5643 } 5644 5645 /* show info for CPU (v - 1) */ 5646 5647 cpu = (long)v - 1; 5648 seq_printf(m, 5649 "CPU%-2d overflow intrs : %lu\n" 5650 "CPU%-2d overflow cycles : %lu\n" 5651 "CPU%-2d overflow min : %lu\n" 5652 "CPU%-2d overflow max : %lu\n" 5653 "CPU%-2d smpl handler calls : %lu\n" 5654 "CPU%-2d smpl handler cycles : %lu\n" 5655 "CPU%-2d spurious intrs : %lu\n" 5656 "CPU%-2d replay intrs : %lu\n" 5657 "CPU%-2d syst_wide : %d\n" 5658 "CPU%-2d dcr_pp : %d\n" 5659 "CPU%-2d exclude idle : %d\n" 5660 "CPU%-2d owner : %d\n" 5661 "CPU%-2d context : %p\n" 5662 "CPU%-2d activations : %lu\n", 5663 cpu, pfm_stats[cpu].pfm_ovfl_intr_count, 5664 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles, 5665 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_min, 5666 cpu, pfm_stats[cpu].pfm_ovfl_intr_cycles_max, 5667 cpu, pfm_stats[cpu].pfm_smpl_handler_calls, 5668 cpu, pfm_stats[cpu].pfm_smpl_handler_cycles, 5669 cpu, pfm_stats[cpu].pfm_spurious_ovfl_intr_count, 5670 cpu, pfm_stats[cpu].pfm_replay_ovfl_intr_count, 5671 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_SYST_WIDE ? 1 : 0, 5672 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_DCR_PP ? 1 : 0, 5673 cpu, pfm_get_cpu_data(pfm_syst_info, cpu) & PFM_CPUINFO_EXCL_IDLE ? 1 : 0, 5674 cpu, pfm_get_cpu_data(pmu_owner, cpu) ? pfm_get_cpu_data(pmu_owner, cpu)->pid: -1, 5675 cpu, pfm_get_cpu_data(pmu_ctx, cpu), 5676 cpu, pfm_get_cpu_data(pmu_activation_number, cpu)); 5677 5678 if (num_online_cpus() == 1 && pfm_sysctl.debug > 0) { 5679 5680 psr = pfm_get_psr(); 5681 5682 ia64_srlz_d(); 5683 5684 seq_printf(m, 5685 "CPU%-2d psr : 0x%lx\n" 5686 "CPU%-2d pmc0 : 0x%lx\n", 5687 cpu, psr, 5688 cpu, ia64_get_pmc(0)); 5689 5690 for (i=0; PMC_IS_LAST(i) == 0; i++) { 5691 if (PMC_IS_COUNTING(i) == 0) continue; 5692 seq_printf(m, 5693 "CPU%-2d pmc%u : 0x%lx\n" 5694 "CPU%-2d pmd%u : 0x%lx\n", 5695 cpu, i, ia64_get_pmc(i), 5696 cpu, i, ia64_get_pmd(i)); 5697 } 5698 } 5699 return 0; 5700 } 5701 5702 const struct seq_operations pfm_seq_ops = { 5703 .start = pfm_proc_start, 5704 .next = pfm_proc_next, 5705 .stop = pfm_proc_stop, 5706 .show = pfm_proc_show 5707 }; 5708 5709 /* 5710 * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens 5711 * during pfm_enable() hence before pfm_start(). We cannot assume monitoring 5712 * is active or inactive based on mode. We must rely on the value in 5713 * local_cpu_data->pfm_syst_info 5714 */ 5715 void 5716 pfm_syst_wide_update_task(struct task_struct *task, unsigned long info, int is_ctxswin) 5717 { 5718 struct pt_regs *regs; 5719 unsigned long dcr; 5720 unsigned long dcr_pp; 5721 5722 dcr_pp = info & PFM_CPUINFO_DCR_PP ? 1 : 0; 5723 5724 /* 5725 * pid 0 is guaranteed to be the idle task. There is one such task with pid 0 5726 * on every CPU, so we can rely on the pid to identify the idle task. 5727 */ 5728 if ((info & PFM_CPUINFO_EXCL_IDLE) == 0 || task->pid) { 5729 regs = task_pt_regs(task); 5730 ia64_psr(regs)->pp = is_ctxswin ? dcr_pp : 0; 5731 return; 5732 } 5733 /* 5734 * if monitoring has started 5735 */ 5736 if (dcr_pp) { 5737 dcr = ia64_getreg(_IA64_REG_CR_DCR); 5738 /* 5739 * context switching in? 5740 */ 5741 if (is_ctxswin) { 5742 /* mask monitoring for the idle task */ 5743 ia64_setreg(_IA64_REG_CR_DCR, dcr & ~IA64_DCR_PP); 5744 pfm_clear_psr_pp(); 5745 ia64_srlz_i(); 5746 return; 5747 } 5748 /* 5749 * context switching out 5750 * restore monitoring for next task 5751 * 5752 * Due to inlining this odd if-then-else construction generates 5753 * better code. 5754 */ 5755 ia64_setreg(_IA64_REG_CR_DCR, dcr |IA64_DCR_PP); 5756 pfm_set_psr_pp(); 5757 ia64_srlz_i(); 5758 } 5759 } 5760 5761 #ifdef CONFIG_SMP 5762 5763 static void 5764 pfm_force_cleanup(pfm_context_t *ctx, struct pt_regs *regs) 5765 { 5766 struct task_struct *task = ctx->ctx_task; 5767 5768 ia64_psr(regs)->up = 0; 5769 ia64_psr(regs)->sp = 1; 5770 5771 if (GET_PMU_OWNER() == task) { 5772 DPRINT(("cleared ownership for [%d]\n", 5773 task_pid_nr(ctx->ctx_task))); 5774 SET_PMU_OWNER(NULL, NULL); 5775 } 5776 5777 /* 5778 * disconnect the task from the context and vice-versa 5779 */ 5780 PFM_SET_WORK_PENDING(task, 0); 5781 5782 task->thread.pfm_context = NULL; 5783 task->thread.flags &= ~IA64_THREAD_PM_VALID; 5784 5785 DPRINT(("force cleanup for [%d]\n", task_pid_nr(task))); 5786 } 5787 5788 5789 /* 5790 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5791 */ 5792 void 5793 pfm_save_regs(struct task_struct *task) 5794 { 5795 pfm_context_t *ctx; 5796 unsigned long flags; 5797 u64 psr; 5798 5799 5800 ctx = PFM_GET_CTX(task); 5801 if (ctx == NULL) return; 5802 5803 /* 5804 * we always come here with interrupts ALREADY disabled by 5805 * the scheduler. So we simply need to protect against concurrent 5806 * access, not CPU concurrency. 5807 */ 5808 flags = pfm_protect_ctx_ctxsw(ctx); 5809 5810 if (ctx->ctx_state == PFM_CTX_ZOMBIE) { 5811 struct pt_regs *regs = task_pt_regs(task); 5812 5813 pfm_clear_psr_up(); 5814 5815 pfm_force_cleanup(ctx, regs); 5816 5817 BUG_ON(ctx->ctx_smpl_hdr); 5818 5819 pfm_unprotect_ctx_ctxsw(ctx, flags); 5820 5821 pfm_context_free(ctx); 5822 return; 5823 } 5824 5825 /* 5826 * save current PSR: needed because we modify it 5827 */ 5828 ia64_srlz_d(); 5829 psr = pfm_get_psr(); 5830 5831 BUG_ON(psr & (IA64_PSR_I)); 5832 5833 /* 5834 * stop monitoring: 5835 * This is the last instruction which may generate an overflow 5836 * 5837 * We do not need to set psr.sp because, it is irrelevant in kernel. 5838 * It will be restored from ipsr when going back to user level 5839 */ 5840 pfm_clear_psr_up(); 5841 5842 /* 5843 * keep a copy of psr.up (for reload) 5844 */ 5845 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5846 5847 /* 5848 * release ownership of this PMU. 5849 * PM interrupts are masked, so nothing 5850 * can happen. 5851 */ 5852 SET_PMU_OWNER(NULL, NULL); 5853 5854 /* 5855 * we systematically save the PMD as we have no 5856 * guarantee we will be schedule at that same 5857 * CPU again. 5858 */ 5859 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5860 5861 /* 5862 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5863 * we will need it on the restore path to check 5864 * for pending overflow. 5865 */ 5866 ctx->th_pmcs[0] = ia64_get_pmc(0); 5867 5868 /* 5869 * unfreeze PMU if had pending overflows 5870 */ 5871 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5872 5873 /* 5874 * finally, allow context access. 5875 * interrupts will still be masked after this call. 5876 */ 5877 pfm_unprotect_ctx_ctxsw(ctx, flags); 5878 } 5879 5880 #else /* !CONFIG_SMP */ 5881 void 5882 pfm_save_regs(struct task_struct *task) 5883 { 5884 pfm_context_t *ctx; 5885 u64 psr; 5886 5887 ctx = PFM_GET_CTX(task); 5888 if (ctx == NULL) return; 5889 5890 /* 5891 * save current PSR: needed because we modify it 5892 */ 5893 psr = pfm_get_psr(); 5894 5895 BUG_ON(psr & (IA64_PSR_I)); 5896 5897 /* 5898 * stop monitoring: 5899 * This is the last instruction which may generate an overflow 5900 * 5901 * We do not need to set psr.sp because, it is irrelevant in kernel. 5902 * It will be restored from ipsr when going back to user level 5903 */ 5904 pfm_clear_psr_up(); 5905 5906 /* 5907 * keep a copy of psr.up (for reload) 5908 */ 5909 ctx->ctx_saved_psr_up = psr & IA64_PSR_UP; 5910 } 5911 5912 static void 5913 pfm_lazy_save_regs (struct task_struct *task) 5914 { 5915 pfm_context_t *ctx; 5916 unsigned long flags; 5917 5918 { u64 psr = pfm_get_psr(); 5919 BUG_ON(psr & IA64_PSR_UP); 5920 } 5921 5922 ctx = PFM_GET_CTX(task); 5923 5924 /* 5925 * we need to mask PMU overflow here to 5926 * make sure that we maintain pmc0 until 5927 * we save it. overflow interrupts are 5928 * treated as spurious if there is no 5929 * owner. 5930 * 5931 * XXX: I don't think this is necessary 5932 */ 5933 PROTECT_CTX(ctx,flags); 5934 5935 /* 5936 * release ownership of this PMU. 5937 * must be done before we save the registers. 5938 * 5939 * after this call any PMU interrupt is treated 5940 * as spurious. 5941 */ 5942 SET_PMU_OWNER(NULL, NULL); 5943 5944 /* 5945 * save all the pmds we use 5946 */ 5947 pfm_save_pmds(ctx->th_pmds, ctx->ctx_used_pmds[0]); 5948 5949 /* 5950 * save pmc0 ia64_srlz_d() done in pfm_save_pmds() 5951 * it is needed to check for pended overflow 5952 * on the restore path 5953 */ 5954 ctx->th_pmcs[0] = ia64_get_pmc(0); 5955 5956 /* 5957 * unfreeze PMU if had pending overflows 5958 */ 5959 if (ctx->th_pmcs[0] & ~0x1UL) pfm_unfreeze_pmu(); 5960 5961 /* 5962 * now get can unmask PMU interrupts, they will 5963 * be treated as purely spurious and we will not 5964 * lose any information 5965 */ 5966 UNPROTECT_CTX(ctx,flags); 5967 } 5968 #endif /* CONFIG_SMP */ 5969 5970 #ifdef CONFIG_SMP 5971 /* 5972 * in 2.6, interrupts are masked when we come here and the runqueue lock is held 5973 */ 5974 void 5975 pfm_load_regs (struct task_struct *task) 5976 { 5977 pfm_context_t *ctx; 5978 unsigned long pmc_mask = 0UL, pmd_mask = 0UL; 5979 unsigned long flags; 5980 u64 psr, psr_up; 5981 int need_irq_resend; 5982 5983 ctx = PFM_GET_CTX(task); 5984 if (unlikely(ctx == NULL)) return; 5985 5986 BUG_ON(GET_PMU_OWNER()); 5987 5988 /* 5989 * possible on unload 5990 */ 5991 if (unlikely((task->thread.flags & IA64_THREAD_PM_VALID) == 0)) return; 5992 5993 /* 5994 * we always come here with interrupts ALREADY disabled by 5995 * the scheduler. So we simply need to protect against concurrent 5996 * access, not CPU concurrency. 5997 */ 5998 flags = pfm_protect_ctx_ctxsw(ctx); 5999 psr = pfm_get_psr(); 6000 6001 need_irq_resend = pmu_conf->flags & PFM_PMU_IRQ_RESEND; 6002 6003 BUG_ON(psr & (IA64_PSR_UP|IA64_PSR_PP)); 6004 BUG_ON(psr & IA64_PSR_I); 6005 6006 if (unlikely(ctx->ctx_state == PFM_CTX_ZOMBIE)) { 6007 struct pt_regs *regs = task_pt_regs(task); 6008 6009 BUG_ON(ctx->ctx_smpl_hdr); 6010 6011 pfm_force_cleanup(ctx, regs); 6012 6013 pfm_unprotect_ctx_ctxsw(ctx, flags); 6014 6015 /* 6016 * this one (kmalloc'ed) is fine with interrupts disabled 6017 */ 6018 pfm_context_free(ctx); 6019 6020 return; 6021 } 6022 6023 /* 6024 * we restore ALL the debug registers to avoid picking up 6025 * stale state. 6026 */ 6027 if (ctx->ctx_fl_using_dbreg) { 6028 pfm_restore_ibrs(ctx->ctx_ibrs, pmu_conf->num_ibrs); 6029 pfm_restore_dbrs(ctx->ctx_dbrs, pmu_conf->num_dbrs); 6030 } 6031 /* 6032 * retrieve saved psr.up 6033 */ 6034 psr_up = ctx->ctx_saved_psr_up; 6035 6036 /* 6037 * if we were the last user of the PMU on that CPU, 6038 * then nothing to do except restore psr 6039 */ 6040 if (GET_LAST_CPU(ctx) == smp_processor_id() && ctx->ctx_last_activation == GET_ACTIVATION()) { 6041 6042 /* 6043 * retrieve partial reload masks (due to user modifications) 6044 */ 6045 pmc_mask = ctx->ctx_reload_pmcs[0]; 6046 pmd_mask = ctx->ctx_reload_pmds[0]; 6047 6048 } else { 6049 /* 6050 * To avoid leaking information to the user level when psr.sp=0, 6051 * we must reload ALL implemented pmds (even the ones we don't use). 6052 * In the kernel we only allow PFM_READ_PMDS on registers which 6053 * we initialized or requested (sampling) so there is no risk there. 6054 */ 6055 pmd_mask = pfm_sysctl.fastctxsw ? ctx->ctx_used_pmds[0] : ctx->ctx_all_pmds[0]; 6056 6057 /* 6058 * ALL accessible PMCs are systematically reloaded, unused registers 6059 * get their default (from pfm_reset_pmu_state()) values to avoid picking 6060 * up stale configuration. 6061 * 6062 * PMC0 is never in the mask. It is always restored separately. 6063 */ 6064 pmc_mask = ctx->ctx_all_pmcs[0]; 6065 } 6066 /* 6067 * when context is MASKED, we will restore PMC with plm=0 6068 * and PMD with stale information, but that's ok, nothing 6069 * will be captured. 6070 * 6071 * XXX: optimize here 6072 */ 6073 if (pmd_mask) pfm_restore_pmds(ctx->th_pmds, pmd_mask); 6074 if (pmc_mask) pfm_restore_pmcs(ctx->th_pmcs, pmc_mask); 6075 607