~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kernel/cpu/mcheck/mce_intel.c

Version: ~ [ linux-5.3-rc5 ] ~ [ linux-5.2.9 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.67 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.139 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.189 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.189 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.72 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Intel specific MCE features.
  3  * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca>
  4  * Copyright (C) 2008, 2009 Intel Corporation
  5  * Author: Andi Kleen
  6  */
  7 
  8 #include <linux/gfp.h>
  9 #include <linux/interrupt.h>
 10 #include <linux/percpu.h>
 11 #include <linux/sched.h>
 12 #include <linux/cpumask.h>
 13 #include <asm/apic.h>
 14 #include <asm/processor.h>
 15 #include <asm/msr.h>
 16 #include <asm/mce.h>
 17 
 18 #include "mce-internal.h"
 19 
 20 /*
 21  * Support for Intel Correct Machine Check Interrupts. This allows
 22  * the CPU to raise an interrupt when a corrected machine check happened.
 23  * Normally we pick those up using a regular polling timer.
 24  * Also supports reliable discovery of shared banks.
 25  */
 26 
 27 /*
 28  * CMCI can be delivered to multiple cpus that share a machine check bank
 29  * so we need to designate a single cpu to process errors logged in each bank
 30  * in the interrupt handler (otherwise we would have many races and potential
 31  * double reporting of the same error).
 32  * Note that this can change when a cpu is offlined or brought online since
 33  * some MCA banks are shared across cpus. When a cpu is offlined, cmci_clear()
 34  * disables CMCI on all banks owned by the cpu and clears this bitfield. At
 35  * this point, cmci_rediscover() kicks in and a different cpu may end up
 36  * taking ownership of some of the shared MCA banks that were previously
 37  * owned by the offlined cpu.
 38  */
 39 static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
 40 
 41 /*
 42  * CMCI storm detection backoff counter
 43  *
 44  * During storm, we reset this counter to INITIAL_CHECK_INTERVAL in case we've
 45  * encountered an error. If not, we decrement it by one. We signal the end of
 46  * the CMCI storm when it reaches 0.
 47  */
 48 static DEFINE_PER_CPU(int, cmci_backoff_cnt);
 49 
 50 /*
 51  * cmci_discover_lock protects against parallel discovery attempts
 52  * which could race against each other.
 53  */
 54 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 55 
 56 #define CMCI_THRESHOLD          1
 57 #define CMCI_POLL_INTERVAL      (30 * HZ)
 58 #define CMCI_STORM_INTERVAL     (HZ)
 59 #define CMCI_STORM_THRESHOLD    15
 60 
 61 static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
 62 static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
 63 static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
 64 
 65 enum {
 66         CMCI_STORM_NONE,
 67         CMCI_STORM_ACTIVE,
 68         CMCI_STORM_SUBSIDED,
 69 };
 70 
 71 static atomic_t cmci_storm_on_cpus;
 72 
 73 static int cmci_supported(int *banks)
 74 {
 75         u64 cap;
 76 
 77         if (mca_cfg.cmci_disabled || mca_cfg.ignore_ce)
 78                 return 0;
 79 
 80         /*
 81          * Vendor check is not strictly needed, but the initial
 82          * initialization is vendor keyed and this
 83          * makes sure none of the backdoors are entered otherwise.
 84          */
 85         if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
 86                 return 0;
 87         if (!cpu_has_apic || lapic_get_maxlvt() < 6)
 88                 return 0;
 89         rdmsrl(MSR_IA32_MCG_CAP, cap);
 90         *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff);
 91         return !!(cap & MCG_CMCI_P);
 92 }
 93 
 94 static bool lmce_supported(void)
 95 {
 96         u64 tmp;
 97 
 98         if (mca_cfg.lmce_disabled)
 99                 return false;
100 
101         rdmsrl(MSR_IA32_MCG_CAP, tmp);
102 
103         /*
104          * LMCE depends on recovery support in the processor. Hence both
105          * MCG_SER_P and MCG_LMCE_P should be present in MCG_CAP.
106          */
107         if ((tmp & (MCG_SER_P | MCG_LMCE_P)) !=
108                    (MCG_SER_P | MCG_LMCE_P))
109                 return false;
110 
111         /*
112          * BIOS should indicate support for LMCE by setting bit 20 in
113          * IA32_FEATURE_CONTROL without which touching MCG_EXT_CTL will
114          * generate a #GP fault.
115          */
116         rdmsrl(MSR_IA32_FEATURE_CONTROL, tmp);
117         if ((tmp & (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE)) ==
118                    (FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_LMCE))
119                 return true;
120 
121         return false;
122 }
123 
124 bool mce_intel_cmci_poll(void)
125 {
126         if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
127                 return false;
128 
129         /*
130          * Reset the counter if we've logged an error in the last poll
131          * during the storm.
132          */
133         if (machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned)))
134                 this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
135         else
136                 this_cpu_dec(cmci_backoff_cnt);
137 
138         return true;
139 }
140 
141 void mce_intel_hcpu_update(unsigned long cpu)
142 {
143         if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
144                 atomic_dec(&cmci_storm_on_cpus);
145 
146         per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
147 }
148 
149 static void cmci_toggle_interrupt_mode(bool on)
150 {
151         unsigned long flags, *owned;
152         int bank;
153         u64 val;
154 
155         raw_spin_lock_irqsave(&cmci_discover_lock, flags);
156         owned = this_cpu_ptr(mce_banks_owned);
157         for_each_set_bit(bank, owned, MAX_NR_BANKS) {
158                 rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
159 
160                 if (on)
161                         val |= MCI_CTL2_CMCI_EN;
162                 else
163                         val &= ~MCI_CTL2_CMCI_EN;
164 
165                 wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
166         }
167         raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
168 }
169 
170 unsigned long cmci_intel_adjust_timer(unsigned long interval)
171 {
172         if ((this_cpu_read(cmci_backoff_cnt) > 0) &&
173             (__this_cpu_read(cmci_storm_state) == CMCI_STORM_ACTIVE)) {
174                 mce_notify_irq();
175                 return CMCI_STORM_INTERVAL;
176         }
177 
178         switch (__this_cpu_read(cmci_storm_state)) {
179         case CMCI_STORM_ACTIVE:
180 
181                 /*
182                  * We switch back to interrupt mode once the poll timer has
183                  * silenced itself. That means no events recorded and the timer
184                  * interval is back to our poll interval.
185                  */
186                 __this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
187                 if (!atomic_sub_return(1, &cmci_storm_on_cpus))
188                         pr_notice("CMCI storm subsided: switching to interrupt mode\n");
189 
190                 /* FALLTHROUGH */
191 
192         case CMCI_STORM_SUBSIDED:
193                 /*
194                  * We wait for all CPUs to go back to SUBSIDED state. When that
195                  * happens we switch back to interrupt mode.
196                  */
197                 if (!atomic_read(&cmci_storm_on_cpus)) {
198                         __this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
199                         cmci_toggle_interrupt_mode(true);
200                         cmci_recheck();
201                 }
202                 return CMCI_POLL_INTERVAL;
203         default:
204 
205                 /* We have shiny weather. Let the poll do whatever it thinks. */
206                 return interval;
207         }
208 }
209 
210 static bool cmci_storm_detect(void)
211 {
212         unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
213         unsigned long ts = __this_cpu_read(cmci_time_stamp);
214         unsigned long now = jiffies;
215         int r;
216 
217         if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
218                 return true;
219 
220         if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
221                 cnt++;
222         } else {
223                 cnt = 1;
224                 __this_cpu_write(cmci_time_stamp, now);
225         }
226         __this_cpu_write(cmci_storm_cnt, cnt);
227 
228         if (cnt <= CMCI_STORM_THRESHOLD)
229                 return false;
230 
231         cmci_toggle_interrupt_mode(false);
232         __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
233         r = atomic_add_return(1, &cmci_storm_on_cpus);
234         mce_timer_kick(CMCI_STORM_INTERVAL);
235         this_cpu_write(cmci_backoff_cnt, INITIAL_CHECK_INTERVAL);
236 
237         if (r == 1)
238                 pr_notice("CMCI storm detected: switching to poll mode\n");
239         return true;
240 }
241 
242 /*
243  * The interrupt handler. This is called on every event.
244  * Just call the poller directly to log any events.
245  * This could in theory increase the threshold under high load,
246  * but doesn't for now.
247  */
248 static void intel_threshold_interrupt(void)
249 {
250         if (cmci_storm_detect())
251                 return;
252 
253         machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
254         mce_notify_irq();
255 }
256 
257 /*
258  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
259  * on this CPU. Use the algorithm recommended in the SDM to discover shared
260  * banks.
261  */
262 static void cmci_discover(int banks)
263 {
264         unsigned long *owned = (void *)this_cpu_ptr(&mce_banks_owned);
265         unsigned long flags;
266         int i;
267         int bios_wrong_thresh = 0;
268 
269         raw_spin_lock_irqsave(&cmci_discover_lock, flags);
270         for (i = 0; i < banks; i++) {
271                 u64 val;
272                 int bios_zero_thresh = 0;
273 
274                 if (test_bit(i, owned))
275                         continue;
276 
277                 /* Skip banks in firmware first mode */
278                 if (test_bit(i, mce_banks_ce_disabled))
279                         continue;
280 
281                 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
282 
283                 /* Already owned by someone else? */
284                 if (val & MCI_CTL2_CMCI_EN) {
285                         clear_bit(i, owned);
286                         __clear_bit(i, this_cpu_ptr(mce_poll_banks));
287                         continue;
288                 }
289 
290                 if (!mca_cfg.bios_cmci_threshold) {
291                         val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
292                         val |= CMCI_THRESHOLD;
293                 } else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
294                         /*
295                          * If bios_cmci_threshold boot option was specified
296                          * but the threshold is zero, we'll try to initialize
297                          * it to 1.
298                          */
299                         bios_zero_thresh = 1;
300                         val |= CMCI_THRESHOLD;
301                 }
302 
303                 val |= MCI_CTL2_CMCI_EN;
304                 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
305                 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
306 
307                 /* Did the enable bit stick? -- the bank supports CMCI */
308                 if (val & MCI_CTL2_CMCI_EN) {
309                         set_bit(i, owned);
310                         __clear_bit(i, this_cpu_ptr(mce_poll_banks));
311                         /*
312                          * We are able to set thresholds for some banks that
313                          * had a threshold of 0. This means the BIOS has not
314                          * set the thresholds properly or does not work with
315                          * this boot option. Note down now and report later.
316                          */
317                         if (mca_cfg.bios_cmci_threshold && bios_zero_thresh &&
318                                         (val & MCI_CTL2_CMCI_THRESHOLD_MASK))
319                                 bios_wrong_thresh = 1;
320                 } else {
321                         WARN_ON(!test_bit(i, this_cpu_ptr(mce_poll_banks)));
322                 }
323         }
324         raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
325         if (mca_cfg.bios_cmci_threshold && bios_wrong_thresh) {
326                 pr_info_once(
327                         "bios_cmci_threshold: Some banks do not have valid thresholds set\n");
328                 pr_info_once(
329                         "bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
330         }
331 }
332 
333 /*
334  * Just in case we missed an event during initialization check
335  * all the CMCI owned banks.
336  */
337 void cmci_recheck(void)
338 {
339         unsigned long flags;
340         int banks;
341 
342         if (!mce_available(raw_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
343                 return;
344 
345         local_irq_save(flags);
346         machine_check_poll(MCP_TIMESTAMP, this_cpu_ptr(&mce_banks_owned));
347         local_irq_restore(flags);
348 }
349 
350 /* Caller must hold the lock on cmci_discover_lock */
351 static void __cmci_disable_bank(int bank)
352 {
353         u64 val;
354 
355         if (!test_bit(bank, this_cpu_ptr(mce_banks_owned)))
356                 return;
357         rdmsrl(MSR_IA32_MCx_CTL2(bank), val);
358         val &= ~MCI_CTL2_CMCI_EN;
359         wrmsrl(MSR_IA32_MCx_CTL2(bank), val);
360         __clear_bit(bank, this_cpu_ptr(mce_banks_owned));
361 }
362 
363 /*
364  * Disable CMCI on this CPU for all banks it owns when it goes down.
365  * This allows other CPUs to claim the banks on rediscovery.
366  */
367 void cmci_clear(void)
368 {
369         unsigned long flags;
370         int i;
371         int banks;
372 
373         if (!cmci_supported(&banks))
374                 return;
375         raw_spin_lock_irqsave(&cmci_discover_lock, flags);
376         for (i = 0; i < banks; i++)
377                 __cmci_disable_bank(i);
378         raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
379 }
380 
381 static void cmci_rediscover_work_func(void *arg)
382 {
383         int banks;
384 
385         /* Recheck banks in case CPUs don't all have the same */
386         if (cmci_supported(&banks))
387                 cmci_discover(banks);
388 }
389 
390 /* After a CPU went down cycle through all the others and rediscover */
391 void cmci_rediscover(void)
392 {
393         int banks;
394 
395         if (!cmci_supported(&banks))
396                 return;
397 
398         on_each_cpu(cmci_rediscover_work_func, NULL, 1);
399 }
400 
401 /*
402  * Reenable CMCI on this CPU in case a CPU down failed.
403  */
404 void cmci_reenable(void)
405 {
406         int banks;
407         if (cmci_supported(&banks))
408                 cmci_discover(banks);
409 }
410 
411 void cmci_disable_bank(int bank)
412 {
413         int banks;
414         unsigned long flags;
415 
416         if (!cmci_supported(&banks))
417                 return;
418 
419         raw_spin_lock_irqsave(&cmci_discover_lock, flags);
420         __cmci_disable_bank(bank);
421         raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
422 }
423 
424 static void intel_init_cmci(void)
425 {
426         int banks;
427 
428         if (!cmci_supported(&banks))
429                 return;
430 
431         mce_threshold_vector = intel_threshold_interrupt;
432         cmci_discover(banks);
433         /*
434          * For CPU #0 this runs with still disabled APIC, but that's
435          * ok because only the vector is set up. We still do another
436          * check for the banks later for CPU #0 just to make sure
437          * to not miss any events.
438          */
439         apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED);
440         cmci_recheck();
441 }
442 
443 void intel_init_lmce(void)
444 {
445         u64 val;
446 
447         if (!lmce_supported())
448                 return;
449 
450         rdmsrl(MSR_IA32_MCG_EXT_CTL, val);
451 
452         if (!(val & MCG_EXT_CTL_LMCE_EN))
453                 wrmsrl(MSR_IA32_MCG_EXT_CTL, val | MCG_EXT_CTL_LMCE_EN);
454 }
455 
456 void mce_intel_feature_init(struct cpuinfo_x86 *c)
457 {
458         intel_init_thermal(c);
459         intel_init_cmci();
460         intel_init_lmce();
461 }
462 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp