~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

TOMOYO Linux Cross Reference
Linux/arch/x86/kvm/svm.c

Version: ~ [ linux-5.3-rc5 ] ~ [ linux-5.2.9 ] ~ [ linux-5.1.21 ] ~ [ linux-5.0.21 ] ~ [ linux-4.20.17 ] ~ [ linux-4.19.67 ] ~ [ linux-4.18.20 ] ~ [ linux-4.17.19 ] ~ [ linux-4.16.18 ] ~ [ linux-4.15.18 ] ~ [ linux-4.14.139 ] ~ [ linux-4.13.16 ] ~ [ linux-4.12.14 ] ~ [ linux-4.11.12 ] ~ [ linux-4.10.17 ] ~ [ linux-4.9.189 ] ~ [ linux-4.8.17 ] ~ [ linux-4.7.10 ] ~ [ linux-4.6.7 ] ~ [ linux-4.5.7 ] ~ [ linux-4.4.189 ] ~ [ linux-4.3.6 ] ~ [ linux-4.2.8 ] ~ [ linux-4.1.52 ] ~ [ linux-4.0.9 ] ~ [ linux-3.19.8 ] ~ [ linux-3.18.140 ] ~ [ linux-3.17.8 ] ~ [ linux-3.16.72 ] ~ [ linux-3.15.10 ] ~ [ linux-3.14.79 ] ~ [ linux-3.13.11 ] ~ [ linux-3.12.74 ] ~ [ linux-3.11.10 ] ~ [ linux-3.10.108 ] ~ [ linux-3.9.11 ] ~ [ linux-3.8.13 ] ~ [ linux-3.7.10 ] ~ [ linux-3.6.11 ] ~ [ linux-3.5.7 ] ~ [ linux-3.4.113 ] ~ [ linux-3.3.8 ] ~ [ linux-3.2.102 ] ~ [ linux-3.1.10 ] ~ [ linux-3.0.101 ] ~ [ linux-2.6.32.71 ] ~ [ linux-2.6.0 ] ~ [ linux-2.4.37.11 ] ~ [ unix-v6-master ] ~ [ ccs-tools-1.8.5 ] ~ [ policy-sample ] ~
Architecture: ~ [ i386 ] ~ [ alpha ] ~ [ m68k ] ~ [ mips ] ~ [ ppc ] ~ [ sparc ] ~ [ sparc64 ] ~

  1 /*
  2  * Kernel-based Virtual Machine driver for Linux
  3  *
  4  * AMD SVM support
  5  *
  6  * Copyright (C) 2006 Qumranet, Inc.
  7  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  8  *
  9  * Authors:
 10  *   Yaniv Kamay  <yaniv@qumranet.com>
 11  *   Avi Kivity   <avi@qumranet.com>
 12  *
 13  * This work is licensed under the terms of the GNU GPL, version 2.  See
 14  * the COPYING file in the top-level directory.
 15  *
 16  */
 17 #include <linux/kvm_host.h>
 18 
 19 #include "irq.h"
 20 #include "mmu.h"
 21 #include "kvm_cache_regs.h"
 22 #include "x86.h"
 23 
 24 #include <linux/module.h>
 25 #include <linux/kernel.h>
 26 #include <linux/vmalloc.h>
 27 #include <linux/highmem.h>
 28 #include <linux/sched.h>
 29 #include <linux/ftrace_event.h>
 30 #include <linux/slab.h>
 31 
 32 #include <asm/perf_event.h>
 33 #include <asm/tlbflush.h>
 34 #include <asm/desc.h>
 35 #include <asm/kvm_para.h>
 36 #include <asm/nospec-branch.h>
 37 
 38 #include <asm/virtext.h>
 39 #include "trace.h"
 40 
 41 #define __ex(x) __kvm_handle_fault_on_reboot(x)
 42 
 43 MODULE_AUTHOR("Qumranet");
 44 MODULE_LICENSE("GPL");
 45 
 46 #define IOPM_ALLOC_ORDER 2
 47 #define MSRPM_ALLOC_ORDER 1
 48 
 49 #define SEG_TYPE_LDT 2
 50 #define SEG_TYPE_BUSY_TSS16 3
 51 
 52 #define SVM_FEATURE_NPT            (1 <<  0)
 53 #define SVM_FEATURE_LBRV           (1 <<  1)
 54 #define SVM_FEATURE_SVML           (1 <<  2)
 55 #define SVM_FEATURE_NRIP           (1 <<  3)
 56 #define SVM_FEATURE_TSC_RATE       (1 <<  4)
 57 #define SVM_FEATURE_VMCB_CLEAN     (1 <<  5)
 58 #define SVM_FEATURE_FLUSH_ASID     (1 <<  6)
 59 #define SVM_FEATURE_DECODE_ASSIST  (1 <<  7)
 60 #define SVM_FEATURE_PAUSE_FILTER   (1 << 10)
 61 
 62 #define NESTED_EXIT_HOST        0       /* Exit handled on host level */
 63 #define NESTED_EXIT_DONE        1       /* Exit caused nested vmexit  */
 64 #define NESTED_EXIT_CONTINUE    2       /* Further checks needed      */
 65 
 66 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 67 
 68 #define TSC_RATIO_RSVD          0xffffff0000000000ULL
 69 #define TSC_RATIO_MIN           0x0000000000000001ULL
 70 #define TSC_RATIO_MAX           0x000000ffffffffffULL
 71 
 72 static bool erratum_383_found __read_mostly;
 73 
 74 static const u32 host_save_user_msrs[] = {
 75 #ifdef CONFIG_X86_64
 76         MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
 77         MSR_FS_BASE,
 78 #endif
 79         MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
 80 };
 81 
 82 #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
 83 
 84 struct kvm_vcpu;
 85 
 86 struct nested_state {
 87         struct vmcb *hsave;
 88         u64 hsave_msr;
 89         u64 vm_cr_msr;
 90         u64 vmcb;
 91 
 92         /* These are the merged vectors */
 93         u32 *msrpm;
 94 
 95         /* gpa pointers to the real vectors */
 96         u64 vmcb_msrpm;
 97         u64 vmcb_iopm;
 98 
 99         /* A VMEXIT is required but not yet emulated */
100         bool exit_required;
101 
102         /* cache for intercepts of the guest */
103         u32 intercept_cr;
104         u32 intercept_dr;
105         u32 intercept_exceptions;
106         u64 intercept;
107 
108         /* Nested Paging related state */
109         u64 nested_cr3;
110 };
111 
112 #define MSRPM_OFFSETS   16
113 static u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
114 
115 struct vcpu_svm {
116         struct kvm_vcpu vcpu;
117         struct vmcb *vmcb;
118         unsigned long vmcb_pa;
119         struct svm_cpu_data *svm_data;
120         uint64_t asid_generation;
121         uint64_t sysenter_esp;
122         uint64_t sysenter_eip;
123 
124         u64 next_rip;
125 
126         u64 host_user_msrs[NR_HOST_SAVE_USER_MSRS];
127         struct {
128                 u16 fs;
129                 u16 gs;
130                 u16 ldt;
131                 u64 gs_base;
132         } host;
133 
134         u32 *msrpm;
135 
136         ulong nmi_iret_rip;
137 
138         struct nested_state nested;
139 
140         bool nmi_singlestep;
141 
142         unsigned int3_injected;
143         unsigned long int3_rip;
144         u32 apf_reason;
145 
146         u64  tsc_ratio;
147 };
148 
149 static DEFINE_PER_CPU(u64, current_tsc_ratio);
150 #define TSC_RATIO_DEFAULT       0x0100000000ULL
151 
152 #define MSR_INVALID                     0xffffffffU
153 
154 static struct svm_direct_access_msrs {
155         u32 index;   /* Index of the MSR */
156         bool always; /* True if intercept is always on */
157 } direct_access_msrs[] = {
158         { .index = MSR_STAR,                            .always = true  },
159         { .index = MSR_IA32_SYSENTER_CS,                .always = true  },
160 #ifdef CONFIG_X86_64
161         { .index = MSR_GS_BASE,                         .always = true  },
162         { .index = MSR_FS_BASE,                         .always = true  },
163         { .index = MSR_KERNEL_GS_BASE,                  .always = true  },
164         { .index = MSR_LSTAR,                           .always = true  },
165         { .index = MSR_CSTAR,                           .always = true  },
166         { .index = MSR_SYSCALL_MASK,                    .always = true  },
167 #endif
168         { .index = MSR_IA32_LASTBRANCHFROMIP,           .always = false },
169         { .index = MSR_IA32_LASTBRANCHTOIP,             .always = false },
170         { .index = MSR_IA32_LASTINTFROMIP,              .always = false },
171         { .index = MSR_IA32_LASTINTTOIP,                .always = false },
172         { .index = MSR_INVALID,                         .always = false },
173 };
174 
175 /* enable NPT for AMD64 and X86 with PAE */
176 #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
177 static bool npt_enabled = true;
178 #else
179 static bool npt_enabled;
180 #endif
181 static int npt = 1;
182 
183 module_param(npt, int, S_IRUGO);
184 
185 static int nested = 1;
186 module_param(nested, int, S_IRUGO);
187 
188 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
189 static void svm_complete_interrupts(struct vcpu_svm *svm);
190 
191 static int nested_svm_exit_handled(struct vcpu_svm *svm);
192 static int nested_svm_intercept(struct vcpu_svm *svm);
193 static int nested_svm_vmexit(struct vcpu_svm *svm);
194 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
195                                       bool has_error_code, u32 error_code);
196 static u64 __scale_tsc(u64 ratio, u64 tsc);
197 
198 enum {
199         VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
200                             pause filter count */
201         VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
202         VMCB_ASID,       /* ASID */
203         VMCB_INTR,       /* int_ctl, int_vector */
204         VMCB_NPT,        /* npt_en, nCR3, gPAT */
205         VMCB_CR,         /* CR0, CR3, CR4, EFER */
206         VMCB_DR,         /* DR6, DR7 */
207         VMCB_DT,         /* GDT, IDT */
208         VMCB_SEG,        /* CS, DS, SS, ES, CPL */
209         VMCB_CR2,        /* CR2 only */
210         VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
211         VMCB_DIRTY_MAX,
212 };
213 
214 /* TPR and CR2 are always written before VMRUN */
215 #define VMCB_ALWAYS_DIRTY_MASK  ((1U << VMCB_INTR) | (1U << VMCB_CR2))
216 
217 static inline void mark_all_dirty(struct vmcb *vmcb)
218 {
219         vmcb->control.clean = 0;
220 }
221 
222 static inline void mark_all_clean(struct vmcb *vmcb)
223 {
224         vmcb->control.clean = ((1 << VMCB_DIRTY_MAX) - 1)
225                                & ~VMCB_ALWAYS_DIRTY_MASK;
226 }
227 
228 static inline void mark_dirty(struct vmcb *vmcb, int bit)
229 {
230         vmcb->control.clean &= ~(1 << bit);
231 }
232 
233 static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
234 {
235         return container_of(vcpu, struct vcpu_svm, vcpu);
236 }
237 
238 static void recalc_intercepts(struct vcpu_svm *svm)
239 {
240         struct vmcb_control_area *c, *h;
241         struct nested_state *g;
242 
243         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
244 
245         if (!is_guest_mode(&svm->vcpu))
246                 return;
247 
248         c = &svm->vmcb->control;
249         h = &svm->nested.hsave->control;
250         g = &svm->nested;
251 
252         c->intercept_cr = h->intercept_cr | g->intercept_cr;
253         c->intercept_dr = h->intercept_dr | g->intercept_dr;
254         c->intercept_exceptions = h->intercept_exceptions | g->intercept_exceptions;
255         c->intercept = h->intercept | g->intercept;
256 }
257 
258 static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
259 {
260         if (is_guest_mode(&svm->vcpu))
261                 return svm->nested.hsave;
262         else
263                 return svm->vmcb;
264 }
265 
266 static inline void set_cr_intercept(struct vcpu_svm *svm, int bit)
267 {
268         struct vmcb *vmcb = get_host_vmcb(svm);
269 
270         vmcb->control.intercept_cr |= (1U << bit);
271 
272         recalc_intercepts(svm);
273 }
274 
275 static inline void clr_cr_intercept(struct vcpu_svm *svm, int bit)
276 {
277         struct vmcb *vmcb = get_host_vmcb(svm);
278 
279         vmcb->control.intercept_cr &= ~(1U << bit);
280 
281         recalc_intercepts(svm);
282 }
283 
284 static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit)
285 {
286         struct vmcb *vmcb = get_host_vmcb(svm);
287 
288         return vmcb->control.intercept_cr & (1U << bit);
289 }
290 
291 static inline void set_dr_intercept(struct vcpu_svm *svm, int bit)
292 {
293         struct vmcb *vmcb = get_host_vmcb(svm);
294 
295         vmcb->control.intercept_dr |= (1U << bit);
296 
297         recalc_intercepts(svm);
298 }
299 
300 static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit)
301 {
302         struct vmcb *vmcb = get_host_vmcb(svm);
303 
304         vmcb->control.intercept_dr &= ~(1U << bit);
305 
306         recalc_intercepts(svm);
307 }
308 
309 static inline void set_exception_intercept(struct vcpu_svm *svm, int bit)
310 {
311         struct vmcb *vmcb = get_host_vmcb(svm);
312 
313         vmcb->control.intercept_exceptions |= (1U << bit);
314 
315         recalc_intercepts(svm);
316 }
317 
318 static inline void clr_exception_intercept(struct vcpu_svm *svm, int bit)
319 {
320         struct vmcb *vmcb = get_host_vmcb(svm);
321 
322         vmcb->control.intercept_exceptions &= ~(1U << bit);
323 
324         recalc_intercepts(svm);
325 }
326 
327 static inline void set_intercept(struct vcpu_svm *svm, int bit)
328 {
329         struct vmcb *vmcb = get_host_vmcb(svm);
330 
331         vmcb->control.intercept |= (1ULL << bit);
332 
333         recalc_intercepts(svm);
334 }
335 
336 static inline void clr_intercept(struct vcpu_svm *svm, int bit)
337 {
338         struct vmcb *vmcb = get_host_vmcb(svm);
339 
340         vmcb->control.intercept &= ~(1ULL << bit);
341 
342         recalc_intercepts(svm);
343 }
344 
345 static inline void enable_gif(struct vcpu_svm *svm)
346 {
347         svm->vcpu.arch.hflags |= HF_GIF_MASK;
348 }
349 
350 static inline void disable_gif(struct vcpu_svm *svm)
351 {
352         svm->vcpu.arch.hflags &= ~HF_GIF_MASK;
353 }
354 
355 static inline bool gif_set(struct vcpu_svm *svm)
356 {
357         return !!(svm->vcpu.arch.hflags & HF_GIF_MASK);
358 }
359 
360 static unsigned long iopm_base;
361 
362 struct kvm_ldttss_desc {
363         u16 limit0;
364         u16 base0;
365         unsigned base1:8, type:5, dpl:2, p:1;
366         unsigned limit1:4, zero0:3, g:1, base2:8;
367         u32 base3;
368         u32 zero1;
369 } __attribute__((packed));
370 
371 struct svm_cpu_data {
372         int cpu;
373 
374         u64 asid_generation;
375         u32 max_asid;
376         u32 next_asid;
377         struct kvm_ldttss_desc *tss_desc;
378 
379         struct page *save_area;
380 };
381 
382 static DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
383 
384 struct svm_init_data {
385         int cpu;
386         int r;
387 };
388 
389 static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
390 
391 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
392 #define MSRS_RANGE_SIZE 2048
393 #define MSRS_IN_RANGE (MSRS_RANGE_SIZE * 8 / 2)
394 
395 static u32 svm_msrpm_offset(u32 msr)
396 {
397         u32 offset;
398         int i;
399 
400         for (i = 0; i < NUM_MSR_MAPS; i++) {
401                 if (msr < msrpm_ranges[i] ||
402                     msr >= msrpm_ranges[i] + MSRS_IN_RANGE)
403                         continue;
404 
405                 offset  = (msr - msrpm_ranges[i]) / 4; /* 4 msrs per u8 */
406                 offset += (i * MSRS_RANGE_SIZE);       /* add range offset */
407 
408                 /* Now we have the u8 offset - but need the u32 offset */
409                 return offset / 4;
410         }
411 
412         /* MSR not in any range */
413         return MSR_INVALID;
414 }
415 
416 #define MAX_INST_SIZE 15
417 
418 static inline void clgi(void)
419 {
420         asm volatile (__ex(SVM_CLGI));
421 }
422 
423 static inline void stgi(void)
424 {
425         asm volatile (__ex(SVM_STGI));
426 }
427 
428 static inline void invlpga(unsigned long addr, u32 asid)
429 {
430         asm volatile (__ex(SVM_INVLPGA) : : "a"(addr), "c"(asid));
431 }
432 
433 static int get_npt_level(void)
434 {
435 #ifdef CONFIG_X86_64
436         return PT64_ROOT_LEVEL;
437 #else
438         return PT32E_ROOT_LEVEL;
439 #endif
440 }
441 
442 static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer)
443 {
444         vcpu->arch.efer = efer;
445         if (!npt_enabled && !(efer & EFER_LMA))
446                 efer &= ~EFER_LME;
447 
448         to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME;
449         mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
450 }
451 
452 static int is_external_interrupt(u32 info)
453 {
454         info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID;
455         return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR);
456 }
457 
458 static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
459 {
460         struct vcpu_svm *svm = to_svm(vcpu);
461         u32 ret = 0;
462 
463         if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)
464                 ret |= KVM_X86_SHADOW_INT_STI | KVM_X86_SHADOW_INT_MOV_SS;
465         return ret & mask;
466 }
467 
468 static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask)
469 {
470         struct vcpu_svm *svm = to_svm(vcpu);
471 
472         if (mask == 0)
473                 svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK;
474         else
475                 svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK;
476 
477 }
478 
479 static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
480 {
481         struct vcpu_svm *svm = to_svm(vcpu);
482 
483         if (svm->vmcb->control.next_rip != 0)
484                 svm->next_rip = svm->vmcb->control.next_rip;
485 
486         if (!svm->next_rip) {
487                 if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
488                                 EMULATE_DONE)
489                         printk(KERN_DEBUG "%s: NOP\n", __func__);
490                 return;
491         }
492         if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE)
493                 printk(KERN_ERR "%s: ip 0x%lx next 0x%llx\n",
494                        __func__, kvm_rip_read(vcpu), svm->next_rip);
495 
496         kvm_rip_write(vcpu, svm->next_rip);
497         svm_set_interrupt_shadow(vcpu, 0);
498 }
499 
500 static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
501                                 bool has_error_code, u32 error_code,
502                                 bool reinject)
503 {
504         struct vcpu_svm *svm = to_svm(vcpu);
505 
506         /*
507          * If we are within a nested VM we'd better #VMEXIT and let the guest
508          * handle the exception
509          */
510         if (!reinject &&
511             nested_svm_check_exception(svm, nr, has_error_code, error_code))
512                 return;
513 
514         if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
515                 unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
516 
517                 /*
518                  * For guest debugging where we have to reinject #BP if some
519                  * INT3 is guest-owned:
520                  * Emulate nRIP by moving RIP forward. Will fail if injection
521                  * raises a fault that is not intercepted. Still better than
522                  * failing in all cases.
523                  */
524                 skip_emulated_instruction(&svm->vcpu);
525                 rip = kvm_rip_read(&svm->vcpu);
526                 svm->int3_rip = rip + svm->vmcb->save.cs.base;
527                 svm->int3_injected = rip - old_rip;
528         }
529 
530         svm->vmcb->control.event_inj = nr
531                 | SVM_EVTINJ_VALID
532                 | (has_error_code ? SVM_EVTINJ_VALID_ERR : 0)
533                 | SVM_EVTINJ_TYPE_EXEPT;
534         svm->vmcb->control.event_inj_err = error_code;
535 }
536 
537 static void svm_init_erratum_383(void)
538 {
539         u32 low, high;
540         int err;
541         u64 val;
542 
543         if (!cpu_has_amd_erratum(amd_erratum_383))
544                 return;
545 
546         /* Use _safe variants to not break nested virtualization */
547         val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
548         if (err)
549                 return;
550 
551         val |= (1ULL << 47);
552 
553         low  = lower_32_bits(val);
554         high = upper_32_bits(val);
555 
556         native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
557 
558         erratum_383_found = true;
559 }
560 
561 static int has_svm(void)
562 {
563         const char *msg;
564 
565         if (!cpu_has_svm(&msg)) {
566                 printk(KERN_INFO "has_svm: %s\n", msg);
567                 return 0;
568         }
569 
570         return 1;
571 }
572 
573 static void svm_hardware_disable(void *garbage)
574 {
575         /* Make sure we clean up behind us */
576         if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
577                 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
578 
579         cpu_svm_disable();
580 
581         amd_pmu_disable_virt();
582 }
583 
584 static int svm_hardware_enable(void *garbage)
585 {
586 
587         struct svm_cpu_data *sd;
588         uint64_t efer;
589         struct desc_ptr gdt_descr;
590         struct desc_struct *gdt;
591         int me = raw_smp_processor_id();
592 
593         rdmsrl(MSR_EFER, efer);
594         if (efer & EFER_SVME)
595                 return -EBUSY;
596 
597         if (!has_svm()) {
598                 printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
599                        me);
600                 return -EINVAL;
601         }
602         sd = per_cpu(svm_data, me);
603 
604         if (!sd) {
605                 printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
606                        me);
607                 return -EINVAL;
608         }
609 
610         sd->asid_generation = 1;
611         sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
612         sd->next_asid = sd->max_asid + 1;
613 
614         native_store_gdt(&gdt_descr);
615         gdt = (struct desc_struct *)gdt_descr.address;
616         sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
617 
618         wrmsrl(MSR_EFER, efer | EFER_SVME);
619 
620         wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
621 
622         if (static_cpu_has(X86_FEATURE_TSCRATEMSR)) {
623                 wrmsrl(MSR_AMD64_TSC_RATIO, TSC_RATIO_DEFAULT);
624                 __get_cpu_var(current_tsc_ratio) = TSC_RATIO_DEFAULT;
625         }
626 
627         svm_init_erratum_383();
628 
629         amd_pmu_enable_virt();
630 
631         return 0;
632 }
633 
634 static void svm_cpu_uninit(int cpu)
635 {
636         struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
637 
638         if (!sd)
639                 return;
640 
641         per_cpu(svm_data, raw_smp_processor_id()) = NULL;
642         __free_page(sd->save_area);
643         kfree(sd);
644 }
645 
646 static int svm_cpu_init(int cpu)
647 {
648         struct svm_cpu_data *sd;
649         int r;
650 
651         sd = kzalloc(sizeof(struct svm_cpu_data), GFP_KERNEL);
652         if (!sd)
653                 return -ENOMEM;
654         sd->cpu = cpu;
655         sd->save_area = alloc_page(GFP_KERNEL);
656         r = -ENOMEM;
657         if (!sd->save_area)
658                 goto err_1;
659 
660         per_cpu(svm_data, cpu) = sd;
661 
662         return 0;
663 
664 err_1:
665         kfree(sd);
666         return r;
667 
668 }
669 
670 static bool valid_msr_intercept(u32 index)
671 {
672         int i;
673 
674         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++)
675                 if (direct_access_msrs[i].index == index)
676                         return true;
677 
678         return false;
679 }
680 
681 static void set_msr_interception(u32 *msrpm, unsigned msr,
682                                  int read, int write)
683 {
684         u8 bit_read, bit_write;
685         unsigned long tmp;
686         u32 offset;
687 
688         /*
689          * If this warning triggers extend the direct_access_msrs list at the
690          * beginning of the file
691          */
692         WARN_ON(!valid_msr_intercept(msr));
693 
694         offset    = svm_msrpm_offset(msr);
695         bit_read  = 2 * (msr & 0x0f);
696         bit_write = 2 * (msr & 0x0f) + 1;
697         tmp       = msrpm[offset];
698 
699         BUG_ON(offset == MSR_INVALID);
700 
701         read  ? clear_bit(bit_read,  &tmp) : set_bit(bit_read,  &tmp);
702         write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp);
703 
704         msrpm[offset] = tmp;
705 }
706 
707 static void svm_vcpu_init_msrpm(u32 *msrpm)
708 {
709         int i;
710 
711         memset(msrpm, 0xff, PAGE_SIZE * (1 << MSRPM_ALLOC_ORDER));
712 
713         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
714                 if (!direct_access_msrs[i].always)
715                         continue;
716 
717                 set_msr_interception(msrpm, direct_access_msrs[i].index, 1, 1);
718         }
719 }
720 
721 static void add_msr_offset(u32 offset)
722 {
723         int i;
724 
725         for (i = 0; i < MSRPM_OFFSETS; ++i) {
726 
727                 /* Offset already in list? */
728                 if (msrpm_offsets[i] == offset)
729                         return;
730 
731                 /* Slot used by another offset? */
732                 if (msrpm_offsets[i] != MSR_INVALID)
733                         continue;
734 
735                 /* Add offset to list */
736                 msrpm_offsets[i] = offset;
737 
738                 return;
739         }
740 
741         /*
742          * If this BUG triggers the msrpm_offsets table has an overflow. Just
743          * increase MSRPM_OFFSETS in this case.
744          */
745         BUG();
746 }
747 
748 static void init_msrpm_offsets(void)
749 {
750         int i;
751 
752         memset(msrpm_offsets, 0xff, sizeof(msrpm_offsets));
753 
754         for (i = 0; direct_access_msrs[i].index != MSR_INVALID; i++) {
755                 u32 offset;
756 
757                 offset = svm_msrpm_offset(direct_access_msrs[i].index);
758                 BUG_ON(offset == MSR_INVALID);
759 
760                 add_msr_offset(offset);
761         }
762 }
763 
764 static void svm_enable_lbrv(struct vcpu_svm *svm)
765 {
766         u32 *msrpm = svm->msrpm;
767 
768         svm->vmcb->control.lbr_ctl = 1;
769         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
770         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
771         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
772         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
773 }
774 
775 static void svm_disable_lbrv(struct vcpu_svm *svm)
776 {
777         u32 *msrpm = svm->msrpm;
778 
779         svm->vmcb->control.lbr_ctl = 0;
780         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
781         set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
782         set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
783         set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
784 }
785 
786 static __init int svm_hardware_setup(void)
787 {
788         int cpu;
789         struct page *iopm_pages;
790         void *iopm_va;
791         int r;
792 
793         iopm_pages = alloc_pages(GFP_KERNEL, IOPM_ALLOC_ORDER);
794 
795         if (!iopm_pages)
796                 return -ENOMEM;
797 
798         iopm_va = page_address(iopm_pages);
799         memset(iopm_va, 0xff, PAGE_SIZE * (1 << IOPM_ALLOC_ORDER));
800         iopm_base = page_to_pfn(iopm_pages) << PAGE_SHIFT;
801 
802         init_msrpm_offsets();
803 
804         if (boot_cpu_has(X86_FEATURE_NX))
805                 kvm_enable_efer_bits(EFER_NX);
806 
807         if (boot_cpu_has(X86_FEATURE_FXSR_OPT))
808                 kvm_enable_efer_bits(EFER_FFXSR);
809 
810         if (boot_cpu_has(X86_FEATURE_TSCRATEMSR)) {
811                 u64 max;
812 
813                 kvm_has_tsc_control = true;
814 
815                 /*
816                  * Make sure the user can only configure tsc_khz values that
817                  * fit into a signed integer.
818                  * A min value is not calculated needed because it will always
819                  * be 1 on all machines and a value of 0 is used to disable
820                  * tsc-scaling for the vcpu.
821                  */
822                 max = min(0x7fffffffULL, __scale_tsc(tsc_khz, TSC_RATIO_MAX));
823 
824                 kvm_max_guest_tsc_khz = max;
825         }
826 
827         if (nested) {
828                 printk(KERN_INFO "kvm: Nested Virtualization enabled\n");
829                 kvm_enable_efer_bits(EFER_SVME | EFER_LMSLE);
830         }
831 
832         for_each_possible_cpu(cpu) {
833                 r = svm_cpu_init(cpu);
834                 if (r)
835                         goto err;
836         }
837 
838         if (!boot_cpu_has(X86_FEATURE_NPT))
839                 npt_enabled = false;
840 
841         if (npt_enabled && !npt) {
842                 printk(KERN_INFO "kvm: Nested Paging disabled\n");
843                 npt_enabled = false;
844         }
845 
846         if (npt_enabled) {
847                 printk(KERN_INFO "kvm: Nested Paging enabled\n");
848                 kvm_enable_tdp();
849         } else
850                 kvm_disable_tdp();
851 
852         return 0;
853 
854 err:
855         __free_pages(iopm_pages, IOPM_ALLOC_ORDER);
856         iopm_base = 0;
857         return r;
858 }
859 
860 static __exit void svm_hardware_unsetup(void)
861 {
862         int cpu;
863 
864         for_each_possible_cpu(cpu)
865                 svm_cpu_uninit(cpu);
866 
867         __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
868         iopm_base = 0;
869 }
870 
871 static void init_seg(struct vmcb_seg *seg)
872 {
873         seg->selector = 0;
874         seg->attrib = SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK |
875                       SVM_SELECTOR_WRITE_MASK; /* Read/Write Data Segment */
876         seg->limit = 0xffff;
877         seg->base = 0;
878 }
879 
880 static void init_sys_seg(struct vmcb_seg *seg, uint32_t type)
881 {
882         seg->selector = 0;
883         seg->attrib = SVM_SELECTOR_P_MASK | type;
884         seg->limit = 0xffff;
885         seg->base = 0;
886 }
887 
888 static u64 __scale_tsc(u64 ratio, u64 tsc)
889 {
890         u64 mult, frac, _tsc;
891 
892         mult  = ratio >> 32;
893         frac  = ratio & ((1ULL << 32) - 1);
894 
895         _tsc  = tsc;
896         _tsc *= mult;
897         _tsc += (tsc >> 32) * frac;
898         _tsc += ((tsc & ((1ULL << 32) - 1)) * frac) >> 32;
899 
900         return _tsc;
901 }
902 
903 static u64 svm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc)
904 {
905         struct vcpu_svm *svm = to_svm(vcpu);
906         u64 _tsc = tsc;
907 
908         if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
909                 _tsc = __scale_tsc(svm->tsc_ratio, tsc);
910 
911         return _tsc;
912 }
913 
914 static void svm_set_tsc_khz(struct kvm_vcpu *vcpu, u32 user_tsc_khz)
915 {
916         struct vcpu_svm *svm = to_svm(vcpu);
917         u64 ratio;
918         u64 khz;
919 
920         /* TSC scaling supported? */
921         if (!boot_cpu_has(X86_FEATURE_TSCRATEMSR))
922                 return;
923 
924         /* TSC-Scaling disabled or guest TSC same frequency as host TSC? */
925         if (user_tsc_khz == 0) {
926                 vcpu->arch.virtual_tsc_khz = 0;
927                 svm->tsc_ratio = TSC_RATIO_DEFAULT;
928                 return;
929         }
930 
931         khz = user_tsc_khz;
932 
933         /* TSC scaling required  - calculate ratio */
934         ratio = khz << 32;
935         do_div(ratio, tsc_khz);
936 
937         if (ratio == 0 || ratio & TSC_RATIO_RSVD) {
938                 WARN_ONCE(1, "Invalid TSC ratio - virtual-tsc-khz=%u\n",
939                                 user_tsc_khz);
940                 return;
941         }
942         vcpu->arch.virtual_tsc_khz = user_tsc_khz;
943         svm->tsc_ratio             = ratio;
944 }
945 
946 static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
947 {
948         struct vcpu_svm *svm = to_svm(vcpu);
949         u64 g_tsc_offset = 0;
950 
951         if (is_guest_mode(vcpu)) {
952                 g_tsc_offset = svm->vmcb->control.tsc_offset -
953                                svm->nested.hsave->control.tsc_offset;
954                 svm->nested.hsave->control.tsc_offset = offset;
955         }
956 
957         svm->vmcb->control.tsc_offset = offset + g_tsc_offset;
958 
959         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
960 }
961 
962 static void svm_adjust_tsc_offset(struct kvm_vcpu *vcpu, s64 adjustment)
963 {
964         struct vcpu_svm *svm = to_svm(vcpu);
965 
966         svm->vmcb->control.tsc_offset += adjustment;
967         if (is_guest_mode(vcpu))
968                 svm->nested.hsave->control.tsc_offset += adjustment;
969         mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
970 }
971 
972 static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
973 {
974         u64 tsc;
975 
976         tsc = svm_scale_tsc(vcpu, native_read_tsc());
977 
978         return target_tsc - tsc;
979 }
980 
981 static void init_vmcb(struct vcpu_svm *svm)
982 {
983         struct vmcb_control_area *control = &svm->vmcb->control;
984         struct vmcb_save_area *save = &svm->vmcb->save;
985 
986         svm->vcpu.fpu_active = 1;
987         svm->vcpu.arch.hflags = 0;
988 
989         set_cr_intercept(svm, INTERCEPT_CR0_READ);
990         set_cr_intercept(svm, INTERCEPT_CR3_READ);
991         set_cr_intercept(svm, INTERCEPT_CR4_READ);
992         set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
993         set_cr_intercept(svm, INTERCEPT_CR3_WRITE);
994         set_cr_intercept(svm, INTERCEPT_CR4_WRITE);
995         set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
996 
997         set_dr_intercept(svm, INTERCEPT_DR0_READ);
998         set_dr_intercept(svm, INTERCEPT_DR1_READ);
999         set_dr_intercept(svm, INTERCEPT_DR2_READ);
1000         set_dr_intercept(svm, INTERCEPT_DR3_READ);
1001         set_dr_intercept(svm, INTERCEPT_DR4_READ);
1002         set_dr_intercept(svm, INTERCEPT_DR5_READ);
1003         set_dr_intercept(svm, INTERCEPT_DR6_READ);
1004         set_dr_intercept(svm, INTERCEPT_DR7_READ);
1005 
1006         set_dr_intercept(svm, INTERCEPT_DR0_WRITE);
1007         set_dr_intercept(svm, INTERCEPT_DR1_WRITE);
1008         set_dr_intercept(svm, INTERCEPT_DR2_WRITE);
1009         set_dr_intercept(svm, INTERCEPT_DR3_WRITE);
1010         set_dr_intercept(svm, INTERCEPT_DR4_WRITE);
1011         set_dr_intercept(svm, INTERCEPT_DR5_WRITE);
1012         set_dr_intercept(svm, INTERCEPT_DR6_WRITE);
1013         set_dr_intercept(svm, INTERCEPT_DR7_WRITE);
1014 
1015         set_exception_intercept(svm, PF_VECTOR);
1016         set_exception_intercept(svm, UD_VECTOR);
1017         set_exception_intercept(svm, MC_VECTOR);
1018         set_exception_intercept(svm, AC_VECTOR);
1019         set_exception_intercept(svm, DB_VECTOR);
1020 
1021         set_intercept(svm, INTERCEPT_INTR);
1022         set_intercept(svm, INTERCEPT_NMI);
1023         set_intercept(svm, INTERCEPT_SMI);
1024         set_intercept(svm, INTERCEPT_SELECTIVE_CR0);
1025         set_intercept(svm, INTERCEPT_CPUID);
1026         set_intercept(svm, INTERCEPT_INVD);
1027         set_intercept(svm, INTERCEPT_HLT);
1028         set_intercept(svm, INTERCEPT_INVLPG);
1029         set_intercept(svm, INTERCEPT_INVLPGA);
1030         set_intercept(svm, INTERCEPT_IOIO_PROT);
1031         set_intercept(svm, INTERCEPT_MSR_PROT);
1032         set_intercept(svm, INTERCEPT_TASK_SWITCH);
1033         set_intercept(svm, INTERCEPT_SHUTDOWN);
1034         set_intercept(svm, INTERCEPT_VMRUN);
1035         set_intercept(svm, INTERCEPT_VMMCALL);
1036         set_intercept(svm, INTERCEPT_VMLOAD);
1037         set_intercept(svm, INTERCEPT_VMSAVE);
1038         set_intercept(svm, INTERCEPT_STGI);
1039         set_intercept(svm, INTERCEPT_CLGI);
1040         set_intercept(svm, INTERCEPT_SKINIT);
1041         set_intercept(svm, INTERCEPT_WBINVD);
1042         set_intercept(svm, INTERCEPT_MONITOR);
1043         set_intercept(svm, INTERCEPT_MWAIT);
1044         set_intercept(svm, INTERCEPT_XSETBV);
1045 
1046         control->iopm_base_pa = iopm_base;
1047         control->msrpm_base_pa = __pa(svm->msrpm);
1048         control->int_ctl = V_INTR_MASKING_MASK;
1049 
1050         init_seg(&save->es);
1051         init_seg(&save->ss);
1052         init_seg(&save->ds);
1053         init_seg(&save->fs);
1054         init_seg(&save->gs);
1055 
1056         save->cs.selector = 0xf000;
1057         /* Executable/Readable Code Segment */
1058         save->cs.attrib = SVM_SELECTOR_READ_MASK | SVM_SELECTOR_P_MASK |
1059                 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
1060         save->cs.limit = 0xffff;
1061         /*
1062          * cs.base should really be 0xffff0000, but vmx can't handle that, so
1063          * be consistent with it.
1064          *
1065          * Replace when we have real mode working for vmx.
1066          */
1067         save->cs.base = 0xf0000;
1068 
1069         save->gdtr.limit = 0xffff;
1070         save->idtr.limit = 0xffff;
1071 
1072         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
1073         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
1074 
1075         svm_set_efer(&svm->vcpu, 0);
1076         save->dr6 = 0xffff0ff0;
1077         save->dr7 = 0x400;
1078         kvm_set_rflags(&svm->vcpu, 2);
1079         save->rip = 0x0000fff0;
1080         svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
1081 
1082         /*
1083          * This is the guest-visible cr0 value.
1084          * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
1085          */
1086         svm->vcpu.arch.cr0 = 0;
1087         (void)kvm_set_cr0(&svm->vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
1088 
1089         save->cr4 = X86_CR4_PAE;
1090         /* rdx = ?? */
1091 
1092         if (npt_enabled) {
1093                 /* Setup VMCB for Nested Paging */
1094                 control->nested_ctl = 1;
1095                 clr_intercept(svm, INTERCEPT_INVLPG);
1096                 clr_exception_intercept(svm, PF_VECTOR);
1097                 clr_cr_intercept(svm, INTERCEPT_CR3_READ);
1098                 clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
1099                 save->g_pat = 0x0007040600070406ULL;
1100                 save->cr3 = 0;
1101                 save->cr4 = 0;
1102         }
1103         svm->asid_generation = 0;
1104 
1105         svm->nested.vmcb = 0;
1106         svm->vcpu.arch.hflags = 0;
1107 
1108         if (boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
1109                 control->pause_filter_count = 3000;
1110                 set_intercept(svm, INTERCEPT_PAUSE);
1111         }
1112 
1113         mark_all_dirty(svm->vmcb);
1114 
1115         enable_gif(svm);
1116 }
1117 
1118 static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
1119 {
1120         struct vcpu_svm *svm = to_svm(vcpu);
1121 
1122         init_vmcb(svm);
1123 
1124         if (!kvm_vcpu_is_bsp(vcpu)) {
1125                 kvm_rip_write(vcpu, 0);
1126                 svm->vmcb->save.cs.base = svm->vcpu.arch.sipi_vector << 12;
1127                 svm->vmcb->save.cs.selector = svm->vcpu.arch.sipi_vector << 8;
1128         }
1129         vcpu->arch.regs_avail = ~0;
1130         vcpu->arch.regs_dirty = ~0;
1131 
1132         return 0;
1133 }
1134 
1135 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
1136 {
1137         struct vcpu_svm *svm;
1138         struct page *page;
1139         struct page *msrpm_pages;
1140         struct page *hsave_page;
1141         struct page *nested_msrpm_pages;
1142         int err;
1143 
1144         svm = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
1145         if (!svm) {
1146                 err = -ENOMEM;
1147                 goto out;
1148         }
1149 
1150         svm->tsc_ratio = TSC_RATIO_DEFAULT;
1151 
1152         err = kvm_vcpu_init(&svm->vcpu, kvm, id);
1153         if (err)
1154                 goto free_svm;
1155 
1156         err = -ENOMEM;
1157         page = alloc_page(GFP_KERNEL);
1158         if (!page)
1159                 goto uninit;
1160 
1161         msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1162         if (!msrpm_pages)
1163                 goto free_page1;
1164 
1165         nested_msrpm_pages = alloc_pages(GFP_KERNEL, MSRPM_ALLOC_ORDER);
1166         if (!nested_msrpm_pages)
1167                 goto free_page2;
1168 
1169         hsave_page = alloc_page(GFP_KERNEL);
1170         if (!hsave_page)
1171                 goto free_page3;
1172 
1173         svm->nested.hsave = page_address(hsave_page);
1174 
1175         svm->msrpm = page_address(msrpm_pages);
1176         svm_vcpu_init_msrpm(svm->msrpm);
1177 
1178         svm->nested.msrpm = page_address(nested_msrpm_pages);
1179         svm_vcpu_init_msrpm(svm->nested.msrpm);
1180 
1181         svm->vmcb = page_address(page);
1182         clear_page(svm->vmcb);
1183         svm->vmcb_pa = page_to_pfn(page) << PAGE_SHIFT;
1184         svm->asid_generation = 0;
1185         init_vmcb(svm);
1186         kvm_write_tsc(&svm->vcpu, 0);
1187 
1188         err = fx_init(&svm->vcpu);
1189         if (err)
1190                 goto free_page4;
1191 
1192         svm->vcpu.arch.apic_base = 0xfee00000 | MSR_IA32_APICBASE_ENABLE;
1193         if (kvm_vcpu_is_bsp(&svm->vcpu))
1194                 svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
1195 
1196         return &svm->vcpu;
1197 
1198 free_page4:
1199         __free_page(hsave_page);
1200 free_page3:
1201         __free_pages(nested_msrpm_pages, MSRPM_ALLOC_ORDER);
1202 free_page2:
1203         __free_pages(msrpm_pages, MSRPM_ALLOC_ORDER);
1204 free_page1:
1205         __free_page(page);
1206 uninit:
1207         kvm_vcpu_uninit(&svm->vcpu);
1208 free_svm:
1209         kmem_cache_free(kvm_vcpu_cache, svm);
1210 out:
1211         return ERR_PTR(err);
1212 }
1213 
1214 static void svm_free_vcpu(struct kvm_vcpu *vcpu)
1215 {
1216         struct vcpu_svm *svm = to_svm(vcpu);
1217 
1218         __free_page(pfn_to_page(svm->vmcb_pa >> PAGE_SHIFT));
1219         __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER);
1220         __free_page(virt_to_page(svm->nested.hsave));
1221         __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER);
1222         kvm_vcpu_uninit(vcpu);
1223         kmem_cache_free(kvm_vcpu_cache, svm);
1224 }
1225 
1226 static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
1227 {
1228         struct vcpu_svm *svm = to_svm(vcpu);
1229         int i;
1230 
1231         if (unlikely(cpu != vcpu->cpu)) {
1232                 svm->asid_generation = 0;
1233                 mark_all_dirty(svm->vmcb);
1234         }
1235 
1236 #ifdef CONFIG_X86_64
1237         rdmsrl(MSR_GS_BASE, to_svm(vcpu)->host.gs_base);
1238 #endif
1239         savesegment(fs, svm->host.fs);
1240         savesegment(gs, svm->host.gs);
1241         svm->host.ldt = kvm_read_ldt();
1242 
1243         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1244                 rdmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1245 
1246         if (static_cpu_has(X86_FEATURE_TSCRATEMSR) &&
1247             svm->tsc_ratio != __get_cpu_var(current_tsc_ratio)) {
1248                 __get_cpu_var(current_tsc_ratio) = svm->tsc_ratio;
1249                 wrmsrl(MSR_AMD64_TSC_RATIO, svm->tsc_ratio);
1250         }
1251 }
1252 
1253 static void svm_vcpu_put(struct kvm_vcpu *vcpu)
1254 {
1255         struct vcpu_svm *svm = to_svm(vcpu);
1256         int i;
1257 
1258         ++vcpu->stat.host_state_reload;
1259         kvm_load_ldt(svm->host.ldt);
1260 #ifdef CONFIG_X86_64
1261         loadsegment(fs, svm->host.fs);
1262         wrmsrl(MSR_KERNEL_GS_BASE, current->thread.gs);
1263         load_gs_index(svm->host.gs);
1264 #else
1265 #ifdef CONFIG_X86_32_LAZY_GS
1266         loadsegment(gs, svm->host.gs);
1267 #endif
1268 #endif
1269         for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
1270                 wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
1271 }
1272 
1273 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
1274 {
1275         return to_svm(vcpu)->vmcb->save.rflags;
1276 }
1277 
1278 static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
1279 {
1280         to_svm(vcpu)->vmcb->save.rflags = rflags;
1281 }
1282 
1283 static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
1284 {
1285         switch (reg) {
1286         case VCPU_EXREG_PDPTR:
1287                 BUG_ON(!npt_enabled);
1288                 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
1289                 break;
1290         default:
1291                 BUG();
1292         }
1293 }
1294 
1295 static void svm_set_vintr(struct vcpu_svm *svm)
1296 {
1297         set_intercept(svm, INTERCEPT_VINTR);
1298 }
1299 
1300 static void svm_clear_vintr(struct vcpu_svm *svm)
1301 {
1302         clr_intercept(svm, INTERCEPT_VINTR);
1303 }
1304 
1305 static struct vmcb_seg *svm_seg(struct kvm_vcpu *vcpu, int seg)
1306 {
1307         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1308 
1309         switch (seg) {
1310         case VCPU_SREG_CS: return &save->cs;
1311         case VCPU_SREG_DS: return &save->ds;
1312         case VCPU_SREG_ES: return &save->es;
1313         case VCPU_SREG_FS: return &save->fs;
1314         case VCPU_SREG_GS: return &save->gs;
1315         case VCPU_SREG_SS: return &save->ss;
1316         case VCPU_SREG_TR: return &save->tr;
1317         case VCPU_SREG_LDTR: return &save->ldtr;
1318         }
1319         BUG();
1320         return NULL;
1321 }
1322 
1323 static u64 svm_get_segment_base(struct kvm_vcpu *vcpu, int seg)
1324 {
1325         struct vmcb_seg *s = svm_seg(vcpu, seg);
1326 
1327         return s->base;
1328 }
1329 
1330 static void svm_get_segment(struct kvm_vcpu *vcpu,
1331                             struct kvm_segment *var, int seg)
1332 {
1333         struct vmcb_seg *s = svm_seg(vcpu, seg);
1334 
1335         var->base = s->base;
1336         var->limit = s->limit;
1337         var->selector = s->selector;
1338         var->type = s->attrib & SVM_SELECTOR_TYPE_MASK;
1339         var->s = (s->attrib >> SVM_SELECTOR_S_SHIFT) & 1;
1340         var->dpl = (s->attrib >> SVM_SELECTOR_DPL_SHIFT) & 3;
1341         var->present = (s->attrib >> SVM_SELECTOR_P_SHIFT) & 1;
1342         var->avl = (s->attrib >> SVM_SELECTOR_AVL_SHIFT) & 1;
1343         var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1;
1344         var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1;
1345         var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1;
1346 
1347         /*
1348          * AMD's VMCB does not have an explicit unusable field, so emulate it
1349          * for cross vendor migration purposes by "not present"
1350          */
1351         var->unusable = !var->present || (var->type == 0);
1352 
1353         switch (seg) {
1354         case VCPU_SREG_CS:
1355                 /*
1356                  * SVM always stores 0 for the 'G' bit in the CS selector in
1357                  * the VMCB on a VMEXIT. This hurts cross-vendor migration:
1358                  * Intel's VMENTRY has a check on the 'G' bit.
1359                  */
1360                 var->g = s->limit > 0xfffff;
1361                 break;
1362         case VCPU_SREG_TR:
1363                 /*
1364                  * Work around a bug where the busy flag in the tr selector
1365                  * isn't exposed
1366                  */
1367                 var->type |= 0x2;
1368                 break;
1369         case VCPU_SREG_DS:
1370         case VCPU_SREG_ES:
1371         case VCPU_SREG_FS:
1372         case VCPU_SREG_GS:
1373                 /*
1374                  * The accessed bit must always be set in the segment
1375                  * descriptor cache, although it can be cleared in the
1376                  * descriptor, the cached bit always remains at 1. Since
1377                  * Intel has a check on this, set it here to support
1378                  * cross-vendor migration.
1379                  */
1380                 if (!var->unusable)
1381                         var->type |= 0x1;
1382                 break;
1383         case VCPU_SREG_SS:
1384                 /*
1385                  * On AMD CPUs sometimes the DB bit in the segment
1386                  * descriptor is left as 1, although the whole segment has
1387                  * been made unusable. Clear it here to pass an Intel VMX
1388                  * entry check when cross vendor migrating.
1389                  */
1390                 if (var->unusable)
1391                         var->db = 0;
1392                 break;
1393         }
1394 }
1395 
1396 static int svm_get_cpl(struct kvm_vcpu *vcpu)
1397 {
1398         struct vmcb_save_area *save = &to_svm(vcpu)->vmcb->save;
1399 
1400         return save->cpl;
1401 }
1402 
1403 static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1404 {
1405         struct vcpu_svm *svm = to_svm(vcpu);
1406 
1407         dt->size = svm->vmcb->save.idtr.limit;
1408         dt->address = svm->vmcb->save.idtr.base;
1409 }
1410 
1411 static void svm_set_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1412 {
1413         struct vcpu_svm *svm = to_svm(vcpu);
1414 
1415         svm->vmcb->save.idtr.limit = dt->size;
1416         svm->vmcb->save.idtr.base = dt->address ;
1417         mark_dirty(svm->vmcb, VMCB_DT);
1418 }
1419 
1420 static void svm_get_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1421 {
1422         struct vcpu_svm *svm = to_svm(vcpu);
1423 
1424         dt->size = svm->vmcb->save.gdtr.limit;
1425         dt->address = svm->vmcb->save.gdtr.base;
1426 }
1427 
1428 static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
1429 {
1430         struct vcpu_svm *svm = to_svm(vcpu);
1431 
1432         svm->vmcb->save.gdtr.limit = dt->size;
1433         svm->vmcb->save.gdtr.base = dt->address ;
1434         mark_dirty(svm->vmcb, VMCB_DT);
1435 }
1436 
1437 static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
1438 {
1439 }
1440 
1441 static void svm_decache_cr3(struct kvm_vcpu *vcpu)
1442 {
1443 }
1444 
1445 static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
1446 {
1447 }
1448 
1449 static void update_cr0_intercept(struct vcpu_svm *svm)
1450 {
1451         ulong gcr0 = svm->vcpu.arch.cr0;
1452         u64 *hcr0 = &svm->vmcb->save.cr0;
1453 
1454         if (!svm->vcpu.fpu_active)
1455                 *hcr0 |= SVM_CR0_SELECTIVE_MASK;
1456         else
1457                 *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
1458                         | (gcr0 & SVM_CR0_SELECTIVE_MASK);
1459 
1460         mark_dirty(svm->vmcb, VMCB_CR);
1461 
1462         if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
1463                 clr_cr_intercept(svm, INTERCEPT_CR0_READ);
1464                 clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1465         } else {
1466                 set_cr_intercept(svm, INTERCEPT_CR0_READ);
1467                 set_cr_intercept(svm, INTERCEPT_CR0_WRITE);
1468         }
1469 }
1470 
1471 static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
1472 {
1473         struct vcpu_svm *svm = to_svm(vcpu);
1474 
1475 #ifdef CONFIG_X86_64
1476         if (vcpu->arch.efer & EFER_LME) {
1477                 if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
1478                         vcpu->arch.efer |= EFER_LMA;
1479                         svm->vmcb->save.efer |= EFER_LMA | EFER_LME;
1480                 }
1481 
1482                 if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) {
1483                         vcpu->arch.efer &= ~EFER_LMA;
1484                         svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME);
1485                 }
1486         }
1487 #endif
1488         vcpu->arch.cr0 = cr0;
1489 
1490         if (!npt_enabled)
1491                 cr0 |= X86_CR0_PG | X86_CR0_WP;
1492 
1493         if (!vcpu->fpu_active)
1494                 cr0 |= X86_CR0_TS;
1495         /*
1496          * re-enable caching here because the QEMU bios
1497          * does not do it - this results in some delay at
1498          * reboot
1499          */
1500         cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
1501         svm->vmcb->save.cr0 = cr0;
1502         mark_dirty(svm->vmcb, VMCB_CR);
1503         update_cr0_intercept(svm);
1504 }
1505 
1506 static int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
1507 {
1508         unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE;
1509         unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4;
1510 
1511         if (cr4 & X86_CR4_VMXE)
1512                 return 1;
1513 
1514         if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE))
1515                 svm_flush_tlb(vcpu);
1516 
1517         vcpu->arch.cr4 = cr4;
1518         if (!npt_enabled)
1519                 cr4 |= X86_CR4_PAE;
1520         cr4 |= host_cr4_mce;
1521         to_svm(vcpu)->vmcb->save.cr4 = cr4;
1522         mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR);
1523         return 0;
1524 }
1525 
1526 static void svm_set_segment(struct kvm_vcpu *vcpu,
1527                             struct kvm_segment *var, int seg)
1528 {
1529         struct vcpu_svm *svm = to_svm(vcpu);
1530         struct vmcb_seg *s = svm_seg(vcpu, seg);
1531 
1532         s->base = var->base;
1533         s->limit = var->limit;
1534         s->selector = var->selector;
1535         if (var->unusable)
1536                 s->attrib = 0;
1537         else {
1538                 s->attrib = (var->type & SVM_SELECTOR_TYPE_MASK);
1539                 s->attrib |= (var->s & 1) << SVM_SELECTOR_S_SHIFT;
1540                 s->attrib |= (var->dpl & 3) << SVM_SELECTOR_DPL_SHIFT;
1541                 s->attrib |= (var->present & 1) << SVM_SELECTOR_P_SHIFT;
1542                 s->attrib |= (var->avl & 1) << SVM_SELECTOR_AVL_SHIFT;
1543                 s->attrib |= (var->l & 1) << SVM_SELECTOR_L_SHIFT;
1544                 s->attrib |= (var->db & 1) << SVM_SELECTOR_DB_SHIFT;
1545                 s->attrib |= (var->g & 1) << SVM_SELECTOR_G_SHIFT;
1546         }
1547         if (seg == VCPU_SREG_CS)
1548                 svm->vmcb->save.cpl
1549                         = (svm->vmcb->save.cs.attrib
1550                            >> SVM_SELECTOR_DPL_SHIFT) & 3;
1551 
1552         mark_dirty(svm->vmcb, VMCB_SEG);
1553 }
1554 
1555 static void update_bp_intercept(struct kvm_vcpu *vcpu)
1556 {
1557         struct vcpu_svm *svm = to_svm(vcpu);
1558 
1559         clr_exception_intercept(svm, BP_VECTOR);
1560 
1561         if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
1562                 if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
1563                         set_exception_intercept(svm, BP_VECTOR);
1564         } else
1565                 vcpu->guest_debug = 0;
1566 }
1567 
1568 static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
1569 {
1570         struct vcpu_svm *svm = to_svm(vcpu);
1571 
1572         if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
1573                 svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
1574         else
1575                 svm->vmcb->save.dr7 = vcpu->arch.dr7;
1576 
1577         mark_dirty(svm->vmcb, VMCB_DR);
1578 
1579         update_bp_intercept(vcpu);
1580 }
1581 
1582 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
1583 {
1584         if (sd->next_asid > sd->max_asid) {
1585                 ++sd->asid_generation;
1586                 sd->next_asid = 1;
1587                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ALL_ASID;
1588         }
1589 
1590         svm->asid_generation = sd->asid_generation;
1591         svm->vmcb->control.asid = sd->next_asid++;
1592 
1593         mark_dirty(svm->vmcb, VMCB_ASID);
1594 }
1595 
1596 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
1597 {
1598         struct vcpu_svm *svm = to_svm(vcpu);
1599 
1600         svm->vmcb->save.dr7 = value;
1601         mark_dirty(svm->vmcb, VMCB_DR);
1602 }
1603 
1604 static int pf_interception(struct vcpu_svm *svm)
1605 {
1606         u64 fault_address = svm->vmcb->control.exit_info_2;
1607         u32 error_code;
1608         int r = 1;
1609 
1610         switch (svm->apf_reason) {
1611         default:
1612                 error_code = svm->vmcb->control.exit_info_1;
1613 
1614                 trace_kvm_page_fault(fault_address, error_code);
1615                 if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
1616                         kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
1617                 r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
1618                         svm->vmcb->control.insn_bytes,
1619                         svm->vmcb->control.insn_len);
1620                 break;
1621         case KVM_PV_REASON_PAGE_NOT_PRESENT:
1622                 svm->apf_reason = 0;
1623                 local_irq_disable();
1624                 kvm_async_pf_task_wait(fault_address, 0);
1625                 local_irq_enable();
1626                 break;
1627         case KVM_PV_REASON_PAGE_READY:
1628                 svm->apf_reason = 0;
1629                 local_irq_disable();
1630                 kvm_async_pf_task_wake(fault_address);
1631                 local_irq_enable();
1632                 break;
1633         }
1634         return r;
1635 }
1636 
1637 static int db_interception(struct vcpu_svm *svm)
1638 {
1639         struct kvm_run *kvm_run = svm->vcpu.run;
1640 
1641         if (!(svm->vcpu.guest_debug &
1642               (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
1643                 !svm->nmi_singlestep) {
1644                 kvm_queue_exception(&svm->vcpu, DB_VECTOR);
1645                 return 1;
1646         }
1647 
1648         if (svm->nmi_singlestep) {
1649                 svm->nmi_singlestep = false;
1650                 if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
1651                         svm->vmcb->save.rflags &=
1652                                 ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
1653         }
1654 
1655         if (svm->vcpu.guest_debug &
1656             (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) {
1657                 kvm_run->exit_reason = KVM_EXIT_DEBUG;
1658                 kvm_run->debug.arch.pc =
1659                         svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1660                 kvm_run->debug.arch.exception = DB_VECTOR;
1661                 return 0;
1662         }
1663 
1664         return 1;
1665 }
1666 
1667 static int bp_interception(struct vcpu_svm *svm)
1668 {
1669         struct kvm_run *kvm_run = svm->vcpu.run;
1670 
1671         kvm_run->exit_reason = KVM_EXIT_DEBUG;
1672         kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
1673         kvm_run->debug.arch.exception = BP_VECTOR;
1674         return 0;
1675 }
1676 
1677 static int ud_interception(struct vcpu_svm *svm)
1678 {
1679         int er;
1680 
1681         er = emulate_instruction(&svm->vcpu, EMULTYPE_TRAP_UD);
1682         if (er != EMULATE_DONE)
1683                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1684         return 1;
1685 }
1686 
1687 static int ac_interception(struct vcpu_svm *svm)
1688 {
1689         kvm_queue_exception_e(&svm->vcpu, AC_VECTOR, 0);
1690         return 1;
1691 }
1692 
1693 static void svm_fpu_activate(struct kvm_vcpu *vcpu)
1694 {
1695         struct vcpu_svm *svm = to_svm(vcpu);
1696 
1697         clr_exception_intercept(svm, NM_VECTOR);
1698 
1699         svm->vcpu.fpu_active = 1;
1700         update_cr0_intercept(svm);
1701 }
1702 
1703 static int nm_interception(struct vcpu_svm *svm)
1704 {
1705         svm_fpu_activate(&svm->vcpu);
1706         return 1;
1707 }
1708 
1709 static bool is_erratum_383(void)
1710 {
1711         int err, i;
1712         u64 value;
1713 
1714         if (!erratum_383_found)
1715                 return false;
1716 
1717         value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
1718         if (err)
1719                 return false;
1720 
1721         /* Bit 62 may or may not be set for this mce */
1722         value &= ~(1ULL << 62);
1723 
1724         if (value != 0xb600000000010015ULL)
1725                 return false;
1726 
1727         /* Clear MCi_STATUS registers */
1728         for (i = 0; i < 6; ++i)
1729                 native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
1730 
1731         value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
1732         if (!err) {
1733                 u32 low, high;
1734 
1735                 value &= ~(1ULL << 2);
1736                 low    = lower_32_bits(value);
1737                 high   = upper_32_bits(value);
1738 
1739                 native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
1740         }
1741 
1742         /* Flush tlb to evict multi-match entries */
1743         __flush_tlb_all();
1744 
1745         return true;
1746 }
1747 
1748 static void svm_handle_mce(struct vcpu_svm *svm)
1749 {
1750         if (is_erratum_383()) {
1751                 /*
1752                  * Erratum 383 triggered. Guest state is corrupt so kill the
1753                  * guest.
1754                  */
1755                 pr_err("KVM: Guest triggered AMD Erratum 383\n");
1756 
1757                 kvm_make_request(KVM_REQ_TRIPLE_FAULT, &svm->vcpu);
1758 
1759                 return;
1760         }
1761 
1762         /*
1763          * On an #MC intercept the MCE handler is not called automatically in
1764          * the host. So do it by hand here.
1765          */
1766         asm volatile (
1767                 "int $0x12\n");
1768         /* not sure if we ever come back to this point */
1769 
1770         return;
1771 }
1772 
1773 static int mc_interception(struct vcpu_svm *svm)
1774 {
1775         return 1;
1776 }
1777 
1778 static int shutdown_interception(struct vcpu_svm *svm)
1779 {
1780         struct kvm_run *kvm_run = svm->vcpu.run;
1781 
1782         /*
1783          * VMCB is undefined after a SHUTDOWN intercept
1784          * so reinitialize it.
1785          */
1786         clear_page(svm->vmcb);
1787         init_vmcb(svm);
1788 
1789         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
1790         return 0;
1791 }
1792 
1793 static int io_interception(struct vcpu_svm *svm)
1794 {
1795         struct kvm_vcpu *vcpu = &svm->vcpu;
1796         u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
1797         int size, in, string;
1798         unsigned port;
1799 
1800         ++svm->vcpu.stat.io_exits;
1801         string = (io_info & SVM_IOIO_STR_MASK) != 0;
1802         in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
1803         if (string || in)
1804                 return emulate_instruction(vcpu, 0) == EMULATE_DONE;
1805 
1806         port = io_info >> 16;
1807         size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
1808         svm->next_rip = svm->vmcb->control.exit_info_2;
1809         skip_emulated_instruction(&svm->vcpu);
1810 
1811         return kvm_fast_pio_out(vcpu, size, port);
1812 }
1813 
1814 static int nmi_interception(struct vcpu_svm *svm)
1815 {
1816         return 1;
1817 }
1818 
1819 static int intr_interception(struct vcpu_svm *svm)
1820 {
1821         ++svm->vcpu.stat.irq_exits;
1822         return 1;
1823 }
1824 
1825 static int nop_on_interception(struct vcpu_svm *svm)
1826 {
1827         return 1;
1828 }
1829 
1830 static int halt_interception(struct vcpu_svm *svm)
1831 {
1832         svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
1833         skip_emulated_instruction(&svm->vcpu);
1834         return kvm_emulate_halt(&svm->vcpu);
1835 }
1836 
1837 static int vmmcall_interception(struct vcpu_svm *svm)
1838 {
1839         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
1840         skip_emulated_instruction(&svm->vcpu);
1841         kvm_emulate_hypercall(&svm->vcpu);
1842         return 1;
1843 }
1844 
1845 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
1846 {
1847         struct vcpu_svm *svm = to_svm(vcpu);
1848 
1849         return svm->nested.nested_cr3;
1850 }
1851 
1852 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
1853 {
1854         struct vcpu_svm *svm = to_svm(vcpu);
1855         u64 cr3 = svm->nested.nested_cr3;
1856         u64 pdpte;
1857         int ret;
1858 
1859         ret = kvm_read_guest_page(vcpu->kvm, gpa_to_gfn(cr3), &pdpte,
1860                                   offset_in_page(cr3) + index * 8, 8);
1861         if (ret)
1862                 return 0;
1863         return pdpte;
1864 }
1865 
1866 static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu,
1867                                    unsigned long root)
1868 {
1869         struct vcpu_svm *svm = to_svm(vcpu);
1870 
1871         svm->vmcb->control.nested_cr3 = root;
1872         mark_dirty(svm->vmcb, VMCB_NPT);
1873         svm_flush_tlb(vcpu);
1874 }
1875 
1876 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
1877                                        struct x86_exception *fault)
1878 {
1879         struct vcpu_svm *svm = to_svm(vcpu);
1880 
1881         svm->vmcb->control.exit_code = SVM_EXIT_NPF;
1882         svm->vmcb->control.exit_code_hi = 0;
1883         svm->vmcb->control.exit_info_1 = fault->error_code;
1884         svm->vmcb->control.exit_info_2 = fault->address;
1885 
1886         nested_svm_vmexit(svm);
1887 }
1888 
1889 static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
1890 {
1891         int r;
1892 
1893         r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
1894 
1895         vcpu->arch.mmu.set_cr3           = nested_svm_set_tdp_cr3;
1896         vcpu->arch.mmu.get_cr3           = nested_svm_get_tdp_cr3;
1897         vcpu->arch.mmu.get_pdptr         = nested_svm_get_tdp_pdptr;
1898         vcpu->arch.mmu.inject_page_fault = nested_svm_inject_npf_exit;
1899         vcpu->arch.mmu.shadow_root_level = get_npt_level();
1900         vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
1901 
1902         return r;
1903 }
1904 
1905 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
1906 {
1907         vcpu->arch.walk_mmu = &vcpu->arch.mmu;
1908 }
1909 
1910 static int nested_svm_check_permissions(struct vcpu_svm *svm)
1911 {
1912         if (!(svm->vcpu.arch.efer & EFER_SVME)
1913             || !is_paging(&svm->vcpu)) {
1914                 kvm_queue_exception(&svm->vcpu, UD_VECTOR);
1915                 return 1;
1916         }
1917 
1918         if (svm->vmcb->save.cpl) {
1919                 kvm_inject_gp(&svm->vcpu, 0);
1920                 return 1;
1921         }
1922 
1923        return 0;
1924 }
1925 
1926 static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
1927                                       bool has_error_code, u32 error_code)
1928 {
1929         int vmexit;
1930 
1931         if (!is_guest_mode(&svm->vcpu))
1932                 return 0;
1933 
1934         svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
1935         svm->vmcb->control.exit_code_hi = 0;
1936         svm->vmcb->control.exit_info_1 = error_code;
1937         svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
1938 
1939         vmexit = nested_svm_intercept(svm);
1940         if (vmexit == NESTED_EXIT_DONE)
1941                 svm->nested.exit_required = true;
1942 
1943         return vmexit;
1944 }
1945 
1946 /* This function returns true if it is save to enable the irq window */
1947 static inline bool nested_svm_intr(struct vcpu_svm *svm)
1948 {
1949         if (!is_guest_mode(&svm->vcpu))
1950                 return true;
1951 
1952         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
1953                 return true;
1954 
1955         if (!(svm->vcpu.arch.hflags & HF_HIF_MASK))
1956                 return false;
1957 
1958         /*
1959          * if vmexit was already requested (by intercepted exception
1960          * for instance) do not overwrite it with "external interrupt"
1961          * vmexit.
1962          */
1963         if (svm->nested.exit_required)
1964                 return false;
1965 
1966         svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
1967         svm->vmcb->control.exit_info_1 = 0;
1968         svm->vmcb->control.exit_info_2 = 0;
1969 
1970         if (svm->nested.intercept & 1ULL) {
1971                 /*
1972                  * The #vmexit can't be emulated here directly because this
1973                  * code path runs with irqs and preemtion disabled. A
1974                  * #vmexit emulation might sleep. Only signal request for
1975                  * the #vmexit here.
1976                  */
1977                 svm->nested.exit_required = true;
1978                 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1979                 return false;
1980         }
1981 
1982         return true;
1983 }
1984 
1985 /* This function returns true if it is save to enable the nmi window */
1986 static inline bool nested_svm_nmi(struct vcpu_svm *svm)
1987 {
1988         if (!is_guest_mode(&svm->vcpu))
1989                 return true;
1990 
1991         if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI)))
1992                 return true;
1993 
1994         svm->vmcb->control.exit_code = SVM_EXIT_NMI;
1995         svm->nested.exit_required = true;
1996 
1997         return false;
1998 }
1999 
2000 static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
2001 {
2002         struct page *page;
2003 
2004         might_sleep();
2005 
2006         page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
2007         if (is_error_page(page))
2008                 goto error;
2009 
2010         *_page = page;
2011 
2012         return kmap(page);
2013 
2014 error:
2015         kvm_release_page_clean(page);
2016         kvm_inject_gp(&svm->vcpu, 0);
2017 
2018         return NULL;
2019 }
2020 
2021 static void nested_svm_unmap(struct page *page)
2022 {
2023         kunmap(page);
2024         kvm_release_page_dirty(page);
2025 }
2026 
2027 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
2028 {
2029         unsigned port;
2030         u8 val, bit;
2031         u64 gpa;
2032 
2033         if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT)))
2034                 return NESTED_EXIT_HOST;
2035 
2036         port = svm->vmcb->control.exit_info_1 >> 16;
2037         gpa  = svm->nested.vmcb_iopm + (port / 8);
2038         bit  = port % 8;
2039         val  = 0;
2040 
2041         if (kvm_read_guest(svm->vcpu.kvm, gpa, &val, 1))
2042                 val &= (1 << bit);
2043 
2044         return val ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2045 }
2046 
2047 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
2048 {
2049         u32 offset, msr, value;
2050         int write, mask;
2051 
2052         if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2053                 return NESTED_EXIT_HOST;
2054 
2055         msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2056         offset = svm_msrpm_offset(msr);
2057         write  = svm->vmcb->control.exit_info_1 & 1;
2058         mask   = 1 << ((2 * (msr & 0xf)) + write);
2059 
2060         if (offset == MSR_INVALID)
2061                 return NESTED_EXIT_DONE;
2062 
2063         /* Offset is in 32 bit units but need in 8 bit units */
2064         offset *= 4;
2065 
2066         if (kvm_read_guest(svm->vcpu.kvm, svm->nested.vmcb_msrpm + offset, &value, 4))
2067                 return NESTED_EXIT_DONE;
2068 
2069         return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
2070 }
2071 
2072 static int nested_svm_exit_special(struct vcpu_svm *svm)
2073 {
2074         u32 exit_code = svm->vmcb->control.exit_code;
2075 
2076         switch (exit_code) {
2077         case SVM_EXIT_INTR:
2078         case SVM_EXIT_NMI:
2079         case SVM_EXIT_EXCP_BASE + MC_VECTOR:
2080                 return NESTED_EXIT_HOST;
2081         case SVM_EXIT_NPF:
2082                 /* For now we are always handling NPFs when using them */
2083                 if (npt_enabled)
2084                         return NESTED_EXIT_HOST;
2085                 break;
2086         case SVM_EXIT_EXCP_BASE + PF_VECTOR:
2087                 /* When we're shadowing, trap PFs, but not async PF */
2088                 if (!npt_enabled && svm->apf_reason == 0)
2089                         return NESTED_EXIT_HOST;
2090                 break;
2091         case SVM_EXIT_EXCP_BASE + NM_VECTOR:
2092                 nm_interception(svm);
2093                 break;
2094         default:
2095                 break;
2096         }
2097 
2098         return NESTED_EXIT_CONTINUE;
2099 }
2100 
2101 /*
2102  * If this function returns true, this #vmexit was already handled
2103  */
2104 static int nested_svm_intercept(struct vcpu_svm *svm)
2105 {
2106         u32 exit_code = svm->vmcb->control.exit_code;
2107         int vmexit = NESTED_EXIT_HOST;
2108 
2109         switch (exit_code) {
2110         case SVM_EXIT_MSR:
2111                 vmexit = nested_svm_exit_handled_msr(svm);
2112                 break;
2113         case SVM_EXIT_IOIO:
2114                 vmexit = nested_svm_intercept_ioio(svm);
2115                 break;
2116         case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: {
2117                 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0);
2118                 if (svm->nested.intercept_cr & bit)
2119                         vmexit = NESTED_EXIT_DONE;
2120                 break;
2121         }
2122         case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: {
2123                 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0);
2124                 if (svm->nested.intercept_dr & bit)
2125                         vmexit = NESTED_EXIT_DONE;
2126                 break;
2127         }
2128         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
2129                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
2130                 if (svm->nested.intercept_exceptions & excp_bits)
2131                         vmexit = NESTED_EXIT_DONE;
2132                 /* async page fault always cause vmexit */
2133                 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
2134                          svm->apf_reason != 0)
2135                         vmexit = NESTED_EXIT_DONE;
2136                 break;
2137         }
2138         case SVM_EXIT_ERR: {
2139                 vmexit = NESTED_EXIT_DONE;
2140                 break;
2141         }
2142         default: {
2143                 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
2144                 if (svm->nested.intercept & exit_bits)
2145                         vmexit = NESTED_EXIT_DONE;
2146         }
2147         }
2148 
2149         return vmexit;
2150 }
2151 
2152 static int nested_svm_exit_handled(struct vcpu_svm *svm)
2153 {
2154         int vmexit;
2155 
2156         vmexit = nested_svm_intercept(svm);
2157 
2158         if (vmexit == NESTED_EXIT_DONE)
2159                 nested_svm_vmexit(svm);
2160 
2161         return vmexit;
2162 }
2163 
2164 static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb)
2165 {
2166         struct vmcb_control_area *dst  = &dst_vmcb->control;
2167         struct vmcb_control_area *from = &from_vmcb->control;
2168 
2169         dst->intercept_cr         = from->intercept_cr;
2170         dst->intercept_dr         = from->intercept_dr;
2171         dst->intercept_exceptions = from->intercept_exceptions;
2172         dst->intercept            = from->intercept;
2173         dst->iopm_base_pa         = from->iopm_base_pa;
2174         dst->msrpm_base_pa        = from->msrpm_base_pa;
2175         dst->tsc_offset           = from->tsc_offset;
2176         dst->asid                 = from->asid;
2177         dst->tlb_ctl              = from->tlb_ctl;
2178         dst->int_ctl              = from->int_ctl;
2179         dst->int_vector           = from->int_vector;
2180         dst->int_state            = from->int_state;
2181         dst->exit_code            = from->exit_code;
2182         dst->exit_code_hi         = from->exit_code_hi;
2183         dst->exit_info_1          = from->exit_info_1;
2184         dst->exit_info_2          = from->exit_info_2;
2185         dst->exit_int_info        = from->exit_int_info;
2186         dst->exit_int_info_err    = from->exit_int_info_err;
2187         dst->nested_ctl           = from->nested_ctl;
2188         dst->event_inj            = from->event_inj;
2189         dst->event_inj_err        = from->event_inj_err;
2190         dst->nested_cr3           = from->nested_cr3;
2191         dst->lbr_ctl              = from->lbr_ctl;
2192 }
2193 
2194 static int nested_svm_vmexit(struct vcpu_svm *svm)
2195 {
2196         struct vmcb *nested_vmcb;
2197         struct vmcb *hsave = svm->nested.hsave;
2198         struct vmcb *vmcb = svm->vmcb;
2199         struct page *page;
2200 
2201         trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
2202                                        vmcb->control.exit_info_1,
2203                                        vmcb->control.exit_info_2,
2204                                        vmcb->control.exit_int_info,
2205                                        vmcb->control.exit_int_info_err,
2206                                        KVM_ISA_SVM);
2207 
2208         nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, &page);
2209         if (!nested_vmcb)
2210                 return 1;
2211 
2212         /* Exit Guest-Mode */
2213         leave_guest_mode(&svm->vcpu);
2214         svm->nested.vmcb = 0;
2215 
2216         /* Give the current vmcb to the guest */
2217         disable_gif(svm);
2218 
2219         nested_vmcb->save.es     = vmcb->save.es;
2220         nested_vmcb->save.cs     = vmcb->save.cs;
2221         nested_vmcb->save.ss     = vmcb->save.ss;
2222         nested_vmcb->save.ds     = vmcb->save.ds;
2223         nested_vmcb->save.gdtr   = vmcb->save.gdtr;
2224         nested_vmcb->save.idtr   = vmcb->save.idtr;
2225         nested_vmcb->save.efer   = svm->vcpu.arch.efer;
2226         nested_vmcb->save.cr0    = kvm_read_cr0(&svm->vcpu);
2227         nested_vmcb->save.cr3    = kvm_read_cr3(&svm->vcpu);
2228         nested_vmcb->save.cr2    = vmcb->save.cr2;
2229         nested_vmcb->save.cr4    = svm->vcpu.arch.cr4;
2230         nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu);
2231         nested_vmcb->save.rip    = vmcb->save.rip;
2232         nested_vmcb->save.rsp    = vmcb->save.rsp;
2233         nested_vmcb->save.rax    = vmcb->save.rax;
2234         nested_vmcb->save.dr7    = vmcb->save.dr7;
2235         nested_vmcb->save.dr6    = vmcb->save.dr6;
2236         nested_vmcb->save.cpl    = vmcb->save.cpl;
2237 
2238         nested_vmcb->control.int_ctl           = vmcb->control.int_ctl;
2239         nested_vmcb->control.int_vector        = vmcb->control.int_vector;
2240         nested_vmcb->control.int_state         = vmcb->control.int_state;
2241         nested_vmcb->control.exit_code         = vmcb->control.exit_code;
2242         nested_vmcb->control.exit_code_hi      = vmcb->control.exit_code_hi;
2243         nested_vmcb->control.exit_info_1       = vmcb->control.exit_info_1;
2244         nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
2245         nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
2246         nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
2247         nested_vmcb->control.next_rip          = vmcb->control.next_rip;
2248 
2249         /*
2250          * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
2251          * to make sure that we do not lose injected events. So check event_inj
2252          * here and copy it to exit_int_info if it is valid.
2253          * Exit_int_info and event_inj can't be both valid because the case
2254          * below only happens on a VMRUN instruction intercept which has
2255          * no valid exit_int_info set.
2256          */
2257         if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
2258                 struct vmcb_control_area *nc = &nested_vmcb->control;
2259 
2260                 nc->exit_int_info     = vmcb->control.event_inj;
2261                 nc->exit_int_info_err = vmcb->control.event_inj_err;
2262         }
2263 
2264         nested_vmcb->control.tlb_ctl           = 0;
2265         nested_vmcb->control.event_inj         = 0;
2266         nested_vmcb->control.event_inj_err     = 0;
2267 
2268         /* We always set V_INTR_MASKING and remember the old value in hflags */
2269         if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK))
2270                 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
2271 
2272         /* Restore the original control entries */
2273         copy_vmcb_control_area(vmcb, hsave);
2274 
2275         kvm_clear_exception_queue(&svm->vcpu);
2276         kvm_clear_interrupt_queue(&svm->vcpu);
2277 
2278         svm->nested.nested_cr3 = 0;
2279 
2280         /* Restore selected save entries */
2281         svm->vmcb->save.es = hsave->save.es;
2282         svm->vmcb->save.cs = hsave->save.cs;
2283         svm->vmcb->save.ss = hsave->save.ss;
2284         svm->vmcb->save.ds = hsave->save.ds;
2285         svm->vmcb->save.gdtr = hsave->save.gdtr;
2286         svm->vmcb->save.idtr = hsave->save.idtr;
2287         kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
2288         svm_set_efer(&svm->vcpu, hsave->save.efer);
2289         svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
2290         svm_set_cr4(&svm->vcpu, hsave->save.cr4);
2291         if (npt_enabled) {
2292                 svm->vmcb->save.cr3 = hsave->save.cr3;
2293                 svm->vcpu.arch.cr3 = hsave->save.cr3;
2294         } else {
2295                 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3);
2296         }
2297         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, hsave->save.rax);
2298         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, hsave->save.rsp);
2299         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, hsave->save.rip);
2300         svm->vmcb->save.dr7 = 0;
2301         svm->vmcb->save.cpl = 0;
2302         svm->vmcb->control.exit_int_info = 0;
2303 
2304         mark_all_dirty(svm->vmcb);
2305 
2306         nested_svm_unmap(page);
2307 
2308         nested_svm_uninit_mmu_context(&svm->vcpu);
2309         kvm_mmu_reset_context(&svm->vcpu);
2310         kvm_mmu_load(&svm->vcpu);
2311 
2312         return 0;
2313 }
2314 
2315 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
2316 {
2317         /*
2318          * This function merges the msr permission bitmaps of kvm and the
2319          * nested vmcb. It is omptimized in that it only merges the parts where
2320          * the kvm msr permission bitmap may contain zero bits
2321          */
2322         int i;
2323 
2324         if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT)))
2325                 return true;
2326 
2327         for (i = 0; i < MSRPM_OFFSETS; i++) {
2328                 u32 value, p;
2329                 u64 offset;
2330 
2331                 if (msrpm_offsets[i] == 0xffffffff)
2332                         break;
2333 
2334                 p      = msrpm_offsets[i];
2335                 offset = svm->nested.vmcb_msrpm + (p * 4);
2336 
2337                 if (kvm_read_guest(svm->vcpu.kvm, offset, &value, 4))
2338                         return false;
2339 
2340                 svm->nested.msrpm[p] = svm->msrpm[p] | value;
2341         }
2342 
2343         svm->vmcb->control.msrpm_base_pa = __pa(svm->nested.msrpm);
2344 
2345         return true;
2346 }
2347 
2348 static bool nested_vmcb_checks(struct vmcb *vmcb)
2349 {
2350         if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0)
2351                 return false;
2352 
2353         if (vmcb->control.asid == 0)
2354                 return false;
2355 
2356         if (vmcb->control.nested_ctl && !npt_enabled)
2357                 return false;
2358 
2359         return true;
2360 }
2361 
2362 static bool nested_svm_vmrun(struct vcpu_svm *svm)
2363 {
2364         struct vmcb *nested_vmcb;
2365         struct vmcb *hsave = svm->nested.hsave;
2366         struct vmcb *vmcb = svm->vmcb;
2367         struct page *page;
2368         u64 vmcb_gpa;
2369 
2370         vmcb_gpa = svm->vmcb->save.rax;
2371 
2372         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2373         if (!nested_vmcb)
2374                 return false;
2375 
2376         if (!nested_vmcb_checks(nested_vmcb)) {
2377                 nested_vmcb->control.exit_code    = SVM_EXIT_ERR;
2378                 nested_vmcb->control.exit_code_hi = 0;
2379                 nested_vmcb->control.exit_info_1  = 0;
2380                 nested_vmcb->control.exit_info_2  = 0;
2381 
2382                 nested_svm_unmap(page);
2383 
2384                 return false;
2385         }
2386 
2387         trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa,
2388                                nested_vmcb->save.rip,
2389                                nested_vmcb->control.int_ctl,
2390                                nested_vmcb->control.event_inj,
2391                                nested_vmcb->control.nested_ctl);
2392 
2393         trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff,
2394                                     nested_vmcb->control.intercept_cr >> 16,
2395                                     nested_vmcb->control.intercept_exceptions,
2396                                     nested_vmcb->control.intercept);
2397 
2398         /* Clear internal status */
2399         kvm_clear_exception_queue(&svm->vcpu);
2400         kvm_clear_interrupt_queue(&svm->vcpu);
2401 
2402         /*
2403          * Save the old vmcb, so we don't need to pick what we save, but can
2404          * restore everything when a VMEXIT occurs
2405          */
2406         hsave->save.es     = vmcb->save.es;
2407         hsave->save.cs     = vmcb->save.cs;
2408         hsave->save.ss     = vmcb->save.ss;
2409         hsave->save.ds     = vmcb->save.ds;
2410         hsave->save.gdtr   = vmcb->save.gdtr;
2411         hsave->save.idtr   = vmcb->save.idtr;
2412         hsave->save.efer   = svm->vcpu.arch.efer;
2413         hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
2414         hsave->save.cr4    = svm->vcpu.arch.cr4;
2415         hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
2416         hsave->save.rip    = kvm_rip_read(&svm->vcpu);
2417         hsave->save.rsp    = vmcb->save.rsp;
2418         hsave->save.rax    = vmcb->save.rax;
2419         if (npt_enabled)
2420                 hsave->save.cr3    = vmcb->save.cr3;
2421         else
2422                 hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
2423 
2424         copy_vmcb_control_area(hsave, vmcb);
2425 
2426         if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF)
2427                 svm->vcpu.arch.hflags |= HF_HIF_MASK;
2428         else
2429                 svm->vcpu.arch.hflags &= ~HF_HIF_MASK;
2430 
2431         if (nested_vmcb->control.nested_ctl) {
2432                 kvm_mmu_unload(&svm->vcpu);
2433                 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3;
2434                 nested_svm_init_mmu_context(&svm->vcpu);
2435         }
2436 
2437         /* Load the nested guest state */
2438         svm->vmcb->save.es = nested_vmcb->save.es;
2439         svm->vmcb->save.cs = nested_vmcb->save.cs;
2440         svm->vmcb->save.ss = nested_vmcb->save.ss;
2441         svm->vmcb->save.ds = nested_vmcb->save.ds;
2442         svm->vmcb->save.gdtr = nested_vmcb->save.gdtr;
2443         svm->vmcb->save.idtr = nested_vmcb->save.idtr;
2444         kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags);
2445         svm_set_efer(&svm->vcpu, nested_vmcb->save.efer);
2446         svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0);
2447         svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4);
2448         if (npt_enabled) {
2449                 svm->vmcb->save.cr3 = nested_vmcb->save.cr3;
2450                 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3;
2451         } else
2452                 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3);
2453 
2454         /* Guest paging mode is active - reset mmu */
2455         kvm_mmu_reset_context(&svm->vcpu);
2456 
2457         svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2;
2458         kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, nested_vmcb->save.rax);
2459         kvm_register_write(&svm->vcpu, VCPU_REGS_RSP, nested_vmcb->save.rsp);
2460         kvm_register_write(&svm->vcpu, VCPU_REGS_RIP, nested_vmcb->save.rip);
2461 
2462         /* In case we don't even reach vcpu_run, the fields are not updated */
2463         svm->vmcb->save.rax = nested_vmcb->save.rax;
2464         svm->vmcb->save.rsp = nested_vmcb->save.rsp;
2465         svm->vmcb->save.rip = nested_vmcb->save.rip;
2466         svm->vmcb->save.dr7 = nested_vmcb->save.dr7;
2467         svm->vmcb->save.dr6 = nested_vmcb->save.dr6;
2468         svm->vmcb->save.cpl = nested_vmcb->save.cpl;
2469 
2470         svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL;
2471         svm->nested.vmcb_iopm  = nested_vmcb->control.iopm_base_pa  & ~0x0fffULL;
2472 
2473         /* cache intercepts */
2474         svm->nested.intercept_cr         = nested_vmcb->control.intercept_cr;
2475         svm->nested.intercept_dr         = nested_vmcb->control.intercept_dr;
2476         svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions;
2477         svm->nested.intercept            = nested_vmcb->control.intercept;
2478 
2479         svm_flush_tlb(&svm->vcpu);
2480         svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
2481         if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
2482                 svm->vcpu.arch.hflags |= HF_VINTR_MASK;
2483         else
2484                 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
2485 
2486         if (svm->vcpu.arch.hflags & HF_VINTR_MASK) {
2487                 /* We only want the cr8 intercept bits of the guest */
2488                 clr_cr_intercept(svm, INTERCEPT_CR8_READ);
2489                 clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
2490         }
2491 
2492         /* We don't want to see VMMCALLs from a nested guest */
2493         clr_intercept(svm, INTERCEPT_VMMCALL);
2494 
2495         svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl;
2496         svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
2497         svm->vmcb->control.int_state = nested_vmcb->control.int_state;
2498         svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
2499         svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
2500         svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
2501 
2502         nested_svm_unmap(page);
2503 
2504         /* Enter Guest-Mode */
2505         enter_guest_mode(&svm->vcpu);
2506 
2507         /*
2508          * Merge guest and host intercepts - must be called  with vcpu in
2509          * guest-mode to take affect here
2510          */
2511         recalc_intercepts(svm);
2512 
2513         svm->nested.vmcb = vmcb_gpa;
2514 
2515         enable_gif(svm);
2516 
2517         mark_all_dirty(svm->vmcb);
2518 
2519         return true;
2520 }
2521 
2522 static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
2523 {
2524         to_vmcb->save.fs = from_vmcb->save.fs;
2525         to_vmcb->save.gs = from_vmcb->save.gs;
2526         to_vmcb->save.tr = from_vmcb->save.tr;
2527         to_vmcb->save.ldtr = from_vmcb->save.ldtr;
2528         to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
2529         to_vmcb->save.star = from_vmcb->save.star;
2530         to_vmcb->save.lstar = from_vmcb->save.lstar;
2531         to_vmcb->save.cstar = from_vmcb->save.cstar;
2532         to_vmcb->save.sfmask = from_vmcb->save.sfmask;
2533         to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
2534         to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
2535         to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
2536 }
2537 
2538 static int vmload_interception(struct vcpu_svm *svm)
2539 {
2540         struct vmcb *nested_vmcb;
2541         struct page *page;
2542 
2543         if (nested_svm_check_permissions(svm))
2544                 return 1;
2545 
2546         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2547         if (!nested_vmcb)
2548                 return 1;
2549 
2550         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2551         skip_emulated_instruction(&svm->vcpu);
2552 
2553         nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
2554         nested_svm_unmap(page);
2555 
2556         return 1;
2557 }
2558 
2559 static int vmsave_interception(struct vcpu_svm *svm)
2560 {
2561         struct vmcb *nested_vmcb;
2562         struct page *page;
2563 
2564         if (nested_svm_check_permissions(svm))
2565                 return 1;
2566 
2567         nested_vmcb = nested_svm_map(svm, svm->vmcb->save.rax, &page);
2568         if (!nested_vmcb)
2569                 return 1;
2570 
2571         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2572         skip_emulated_instruction(&svm->vcpu);
2573 
2574         nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
2575         nested_svm_unmap(page);
2576 
2577         return 1;
2578 }
2579 
2580 static int vmrun_interception(struct vcpu_svm *svm)
2581 {
2582         if (nested_svm_check_permissions(svm))
2583                 return 1;
2584 
2585         /* Save rip after vmrun instruction */
2586         kvm_rip_write(&svm->vcpu, kvm_rip_read(&svm->vcpu) + 3);
2587 
2588         if (!nested_svm_vmrun(svm))
2589                 return 1;
2590 
2591         if (!nested_svm_vmrun_msrpm(svm))
2592                 goto failed;
2593 
2594         return 1;
2595 
2596 failed:
2597 
2598         svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
2599         svm->vmcb->control.exit_code_hi = 0;
2600         svm->vmcb->control.exit_info_1  = 0;
2601         svm->vmcb->control.exit_info_2  = 0;
2602 
2603         nested_svm_vmexit(svm);
2604 
2605         return 1;
2606 }
2607 
2608 static int stgi_interception(struct vcpu_svm *svm)
2609 {
2610         if (nested_svm_check_permissions(svm))
2611                 return 1;
2612 
2613         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2614         skip_emulated_instruction(&svm->vcpu);
2615         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
2616 
2617         enable_gif(svm);
2618 
2619         return 1;
2620 }
2621 
2622 static int clgi_interception(struct vcpu_svm *svm)
2623 {
2624         if (nested_svm_check_permissions(svm))
2625                 return 1;
2626 
2627         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2628         skip_emulated_instruction(&svm->vcpu);
2629 
2630         disable_gif(svm);
2631 
2632         /* After a CLGI no interrupts should come */
2633         svm_clear_vintr(svm);
2634         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
2635 
2636         mark_dirty(svm->vmcb, VMCB_INTR);
2637 
2638         return 1;
2639 }
2640 
2641 static int invlpga_interception(struct vcpu_svm *svm)
2642 {
2643         struct kvm_vcpu *vcpu = &svm->vcpu;
2644 
2645         trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
2646                           vcpu->arch.regs[VCPU_REGS_RAX]);
2647 
2648         /* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
2649         kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
2650 
2651         svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2652         skip_emulated_instruction(&svm->vcpu);
2653         return 1;
2654 }
2655 
2656 static int skinit_interception(struct vcpu_svm *svm)
2657 {
2658         trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
2659 
2660         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2661         return 1;
2662 }
2663 
2664 static int xsetbv_interception(struct vcpu_svm *svm)
2665 {
2666         u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
2667         u32 index = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
2668 
2669         if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
2670                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
2671                 skip_emulated_instruction(&svm->vcpu);
2672         }
2673 
2674         return 1;
2675 }
2676 
2677 static int invalid_op_interception(struct vcpu_svm *svm)
2678 {
2679         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2680         return 1;
2681 }
2682 
2683 static int task_switch_interception(struct vcpu_svm *svm)
2684 {
2685         u16 tss_selector;
2686         int reason;
2687         int int_type = svm->vmcb->control.exit_int_info &
2688                 SVM_EXITINTINFO_TYPE_MASK;
2689         int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK;
2690         uint32_t type =
2691                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK;
2692         uint32_t idt_v =
2693                 svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID;
2694         bool has_error_code = false;
2695         u32 error_code = 0;
2696 
2697         tss_selector = (u16)svm->vmcb->control.exit_info_1;
2698 
2699         if (svm->vmcb->control.exit_info_2 &
2700             (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET))
2701                 reason = TASK_SWITCH_IRET;
2702         else if (svm->vmcb->control.exit_info_2 &
2703                  (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP))
2704                 reason = TASK_SWITCH_JMP;
2705         else if (idt_v)
2706                 reason = TASK_SWITCH_GATE;
2707         else
2708                 reason = TASK_SWITCH_CALL;
2709 
2710         if (reason == TASK_SWITCH_GATE) {
2711                 switch (type) {
2712                 case SVM_EXITINTINFO_TYPE_NMI:
2713                         svm->vcpu.arch.nmi_injected = false;
2714                         break;
2715                 case SVM_EXITINTINFO_TYPE_EXEPT:
2716                         if (svm->vmcb->control.exit_info_2 &
2717                             (1ULL << SVM_EXITINFOSHIFT_TS_HAS_ERROR_CODE)) {
2718                                 has_error_code = true;
2719                                 error_code =
2720                                         (u32)svm->vmcb->control.exit_info_2;
2721                         }
2722                         kvm_clear_exception_queue(&svm->vcpu);
2723                         break;
2724                 case SVM_EXITINTINFO_TYPE_INTR:
2725                         kvm_clear_interrupt_queue(&svm->vcpu);
2726                         break;
2727                 default:
2728                         break;
2729                 }
2730         }
2731 
2732         if (reason != TASK_SWITCH_GATE ||
2733             int_type == SVM_EXITINTINFO_TYPE_SOFT ||
2734             (int_type == SVM_EXITINTINFO_TYPE_EXEPT &&
2735              (int_vec == OF_VECTOR || int_vec == BP_VECTOR)))
2736                 skip_emulated_instruction(&svm->vcpu);
2737 
2738         if (kvm_task_switch(&svm->vcpu, tss_selector, reason,
2739                                 has_error_code, error_code) == EMULATE_FAIL) {
2740                 svm->vcpu.run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2741                 svm->vcpu.run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
2742                 svm->vcpu.run->internal.ndata = 0;
2743                 return 0;
2744         }
2745         return 1;
2746 }
2747 
2748 static int cpuid_interception(struct vcpu_svm *svm)
2749 {
2750         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
2751         kvm_emulate_cpuid(&svm->vcpu);
2752         return 1;
2753 }
2754 
2755 static int iret_interception(struct vcpu_svm *svm)
2756 {
2757         ++svm->vcpu.stat.nmi_window_exits;
2758         clr_intercept(svm, INTERCEPT_IRET);
2759         svm->vcpu.arch.hflags |= HF_IRET_MASK;
2760         svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu);
2761         return 1;
2762 }
2763 
2764 static int invlpg_interception(struct vcpu_svm *svm)
2765 {
2766         if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2767                 return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2768 
2769         kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
2770         skip_emulated_instruction(&svm->vcpu);
2771         return 1;
2772 }
2773 
2774 static int emulate_on_interception(struct vcpu_svm *svm)
2775 {
2776         return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
2777 }
2778 
2779 bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
2780 {
2781         unsigned long cr0 = svm->vcpu.arch.cr0;
2782         bool ret = false;
2783         u64 intercept;
2784 
2785         intercept = svm->nested.intercept;
2786 
2787         if (!is_guest_mode(&svm->vcpu) ||
2788             (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))))
2789                 return false;
2790 
2791         cr0 &= ~SVM_CR0_SELECTIVE_MASK;
2792         val &= ~SVM_CR0_SELECTIVE_MASK;
2793 
2794         if (cr0 ^ val) {
2795                 svm->vmcb->control.exit_code = SVM_EXIT_CR0_SEL_WRITE;
2796                 ret = (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE);
2797         }
2798 
2799         return ret;
2800 }
2801 
2802 #define CR_VALID (1ULL << 63)
2803 
2804 static int cr_interception(struct vcpu_svm *svm)
2805 {
2806         int reg, cr;
2807         unsigned long val;
2808         int err;
2809 
2810         if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
2811                 return emulate_on_interception(svm);
2812 
2813         if (unlikely((svm->vmcb->control.exit_info_1 & CR_VALID) == 0))
2814                 return emulate_on_interception(svm);
2815 
2816         reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2817         cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
2818 
2819         err = 0;
2820         if (cr >= 16) { /* mov to cr */
2821                 cr -= 16;
2822                 val = kvm_register_read(&svm->vcpu, reg);
2823                 switch (cr) {
2824                 case 0:
2825                         if (!check_selective_cr0_intercepted(svm, val))
2826                                 err = kvm_set_cr0(&svm->vcpu, val);
2827                         else
2828                                 return 1;
2829 
2830                         break;
2831                 case 3:
2832                         err = kvm_set_cr3(&svm->vcpu, val);
2833                         break;
2834                 case 4:
2835                         err = kvm_set_cr4(&svm->vcpu, val);
2836                         break;
2837                 case 8:
2838                         err = kvm_set_cr8(&svm->vcpu, val);
2839                         break;
2840                 default:
2841                         WARN(1, "unhandled write to CR%d", cr);
2842                         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2843                         return 1;
2844                 }
2845         } else { /* mov from cr */
2846                 switch (cr) {
2847                 case 0:
2848                         val = kvm_read_cr0(&svm->vcpu);
2849                         break;
2850                 case 2:
2851                         val = svm->vcpu.arch.cr2;
2852                         break;
2853                 case 3:
2854                         val = kvm_read_cr3(&svm->vcpu);
2855                         break;
2856                 case 4:
2857                         val = kvm_read_cr4(&svm->vcpu);
2858                         break;
2859                 case 8:
2860                         val = kvm_get_cr8(&svm->vcpu);
2861                         break;
2862                 default:
2863                         WARN(1, "unhandled read from CR%d", cr);
2864                         kvm_queue_exception(&svm->vcpu, UD_VECTOR);
2865                         return 1;
2866                 }
2867                 kvm_register_write(&svm->vcpu, reg, val);
2868         }
2869         kvm_complete_insn_gp(&svm->vcpu, err);
2870 
2871         return 1;
2872 }
2873 
2874 static int dr_interception(struct vcpu_svm *svm)
2875 {
2876         int reg, dr;
2877         unsigned long val;
2878         int err;
2879 
2880         if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS))
2881                 return emulate_on_interception(svm);
2882 
2883         reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
2884         dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
2885 
2886         if (dr >= 16) { /* mov to DRn */
2887                 val = kvm_register_read(&svm->vcpu, reg);
2888                 kvm_set_dr(&svm->vcpu, dr - 16, val);
2889         } else {
2890                 err = kvm_get_dr(&svm->vcpu, dr, &val);
2891                 if (!err)
2892                         kvm_register_write(&svm->vcpu, reg, val);
2893         }
2894 
2895         skip_emulated_instruction(&svm->vcpu);
2896 
2897         return 1;
2898 }
2899 
2900 static int cr8_write_interception(struct vcpu_svm *svm)
2901 {
2902         struct kvm_run *kvm_run = svm->vcpu.run;
2903         int r;
2904 
2905         u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
2906         /* instruction emulation calls kvm_set_cr8() */
2907         r = cr_interception(svm);
2908         if (irqchip_in_kernel(svm->vcpu.kvm))
2909                 return r;
2910         if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
2911                 return r;
2912         kvm_run->exit_reason = KVM_EXIT_SET_TPR;
2913         return 0;
2914 }
2915 
2916 u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu)
2917 {
2918         struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu));
2919         return vmcb->control.tsc_offset +
2920                 svm_scale_tsc(vcpu, native_read_tsc());
2921 }
2922 
2923 static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
2924 {
2925         struct vcpu_svm *svm = to_svm(vcpu);
2926 
2927         switch (ecx) {
2928         case MSR_IA32_TSC: {
2929                 *data = svm->vmcb->control.tsc_offset +
2930                         svm_scale_tsc(vcpu, native_read_tsc());
2931 
2932                 break;
2933         }
2934         case MSR_STAR:
2935                 *data = svm->vmcb->save.star;
2936                 break;
2937 #ifdef CONFIG_X86_64
2938         case MSR_LSTAR:
2939                 *data = svm->vmcb->save.lstar;
2940                 break;
2941         case MSR_CSTAR:
2942                 *data = svm->vmcb->save.cstar;
2943                 break;
2944         case MSR_KERNEL_GS_BASE:
2945                 *data = svm->vmcb->save.kernel_gs_base;
2946                 break;
2947         case MSR_SYSCALL_MASK:
2948                 *data = svm->vmcb->save.sfmask;
2949                 break;
2950 #endif
2951         case MSR_IA32_SYSENTER_CS:
2952                 *data = svm->vmcb->save.sysenter_cs;
2953                 break;
2954         case MSR_IA32_SYSENTER_EIP:
2955                 *data = svm->sysenter_eip;
2956                 break;
2957         case MSR_IA32_SYSENTER_ESP:
2958                 *data = svm->sysenter_esp;
2959                 break;
2960         /*
2961          * Nobody will change the following 5 values in the VMCB so we can
2962          * safely return them on rdmsr. They will always be 0 until LBRV is
2963          * implemented.
2964          */
2965         case MSR_IA32_DEBUGCTLMSR:
2966                 *data = svm->vmcb->save.dbgctl;
2967                 break;
2968         case MSR_IA32_LASTBRANCHFROMIP:
2969                 *data = svm->vmcb->save.br_from;
2970                 break;
2971         case MSR_IA32_LASTBRANCHTOIP:
2972                 *data = svm->vmcb->save.br_to;
2973                 break;
2974         case MSR_IA32_LASTINTFROMIP:
2975                 *data = svm->vmcb->save.last_excp_from;
2976                 break;
2977         case MSR_IA32_LASTINTTOIP:
2978                 *data = svm->vmcb->save.last_excp_to;
2979                 break;
2980         case MSR_VM_HSAVE_PA:
2981                 *data = svm->nested.hsave_msr;
2982                 break;
2983         case MSR_VM_CR:
2984                 *data = svm->nested.vm_cr_msr;
2985                 break;
2986         case MSR_IA32_UCODE_REV:
2987                 *data = 0x01000065;
2988                 break;
2989         default:
2990                 return kvm_get_msr_common(vcpu, ecx, data);
2991         }
2992         return 0;
2993 }
2994 
2995 static int rdmsr_interception(struct vcpu_svm *svm)
2996 {
2997         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
2998         u64 data;
2999 
3000         if (svm_get_msr(&svm->vcpu, ecx, &data)) {
3001                 trace_kvm_msr_read_ex(ecx);
3002                 kvm_inject_gp(&svm->vcpu, 0);
3003         } else {
3004                 trace_kvm_msr_read(ecx, data);
3005 
3006                 svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
3007                 svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
3008                 svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3009                 skip_emulated_instruction(&svm->vcpu);
3010         }
3011         return 1;
3012 }
3013 
3014 static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
3015 {
3016         struct vcpu_svm *svm = to_svm(vcpu);
3017         int svm_dis, chg_mask;
3018 
3019         if (data & ~SVM_VM_CR_VALID_MASK)
3020                 return 1;
3021 
3022         chg_mask = SVM_VM_CR_VALID_MASK;
3023 
3024         if (svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK)
3025                 chg_mask &= ~(SVM_VM_CR_SVM_LOCK_MASK | SVM_VM_CR_SVM_DIS_MASK);
3026 
3027         svm->nested.vm_cr_msr &= ~chg_mask;
3028         svm->nested.vm_cr_msr |= (data & chg_mask);
3029 
3030         svm_dis = svm->nested.vm_cr_msr & SVM_VM_CR_SVM_DIS_MASK;
3031 
3032         /* check for svm_disable while efer.svme is set */
3033         if (svm_dis && (vcpu->arch.efer & EFER_SVME))
3034                 return 1;
3035 
3036         return 0;
3037 }
3038 
3039 static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
3040 {
3041         struct vcpu_svm *svm = to_svm(vcpu);
3042 
3043         switch (ecx) {
3044         case MSR_IA32_CR_PAT:
3045                 if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
3046                         return 1;
3047                 vcpu->arch.pat = data;
3048                 svm->vmcb->save.g_pat = data;
3049                 mark_dirty(svm->vmcb, VMCB_NPT);
3050                 break;
3051         case MSR_IA32_TSC:
3052                 kvm_write_tsc(vcpu, data);
3053                 break;
3054         case MSR_STAR:
3055                 svm->vmcb->save.star = data;
3056                 break;
3057 #ifdef CONFIG_X86_64
3058         case MSR_LSTAR:
3059                 svm->vmcb->save.lstar = data;
3060                 break;
3061         case MSR_CSTAR:
3062                 svm->vmcb->save.cstar = data;
3063                 break;
3064         case MSR_KERNEL_GS_BASE:
3065                 svm->vmcb->save.kernel_gs_base = data;
3066                 break;
3067         case MSR_SYSCALL_MASK:
3068                 svm->vmcb->save.sfmask = data;
3069                 break;
3070 #endif
3071         case MSR_IA32_SYSENTER_CS:
3072                 svm->vmcb->save.sysenter_cs = data;
3073                 break;
3074         case MSR_IA32_SYSENTER_EIP:
3075                 svm->sysenter_eip = data;
3076                 svm->vmcb->save.sysenter_eip = data;
3077                 break;
3078         case MSR_IA32_SYSENTER_ESP:
3079                 svm->sysenter_esp = data;
3080                 svm->vmcb->save.sysenter_esp = data;
3081                 break;
3082         case MSR_IA32_DEBUGCTLMSR:
3083                 if (!boot_cpu_has(X86_FEATURE_LBRV)) {
3084                         pr_unimpl(vcpu, "%s: MSR_IA32_DEBUGCTL 0x%llx, nop\n",
3085                                         __func__, data);
3086                         break;
3087                 }
3088                 if (data & DEBUGCTL_RESERVED_BITS)
3089                         return 1;
3090 
3091                 svm->vmcb->save.dbgctl = data;
3092                 mark_dirty(svm->vmcb, VMCB_LBR);
3093                 if (data & (1ULL<<0))
3094                         svm_enable_lbrv(svm);
3095                 else
3096                         svm_disable_lbrv(svm);
3097                 break;
3098         case MSR_VM_HSAVE_PA:
3099                 svm->nested.hsave_msr = data;
3100                 break;
3101         case MSR_VM_CR:
3102                 return svm_set_vm_cr(vcpu, data);
3103         case MSR_VM_IGNNE:
3104                 pr_unimpl(vcpu, "unimplemented wrmsr: 0x%x data 0x%llx\n", ecx, data);
3105                 break;
3106         default:
3107                 return kvm_set_msr_common(vcpu, ecx, data);
3108         }
3109         return 0;
3110 }
3111 
3112 static int wrmsr_interception(struct vcpu_svm *svm)
3113 {
3114         u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
3115         u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
3116                 | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
3117 
3118 
3119         svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
3120         if (kvm_set_msr(&svm->vcpu, ecx, data)) {
3121                 trace_kvm_msr_write_ex(ecx, data);
3122                 kvm_inject_gp(&svm->vcpu, 0);
3123         } else {
3124                 trace_kvm_msr_write(ecx, data);
3125                 skip_emulated_instruction(&svm->vcpu);
3126         }
3127         return 1;
3128 }
3129 
3130 static int msr_interception(struct vcpu_svm *svm)
3131 {
3132         if (svm->vmcb->control.exit_info_1)
3133                 return wrmsr_interception(svm);
3134         else
3135                 return rdmsr_interception(svm);
3136 }
3137 
3138 static int interrupt_window_interception(struct vcpu_svm *svm)
3139 {
3140         struct kvm_run *kvm_run = svm->vcpu.run;
3141 
3142         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3143         svm_clear_vintr(svm);
3144         svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
3145         mark_dirty(svm->vmcb, VMCB_INTR);
3146         /*
3147          * If the user space waits to inject interrupts, exit as soon as
3148          * possible
3149          */
3150         if (!irqchip_in_kernel(svm->vcpu.kvm) &&
3151             kvm_run->request_interrupt_window &&
3152             !kvm_cpu_has_interrupt(&svm->vcpu)) {
3153                 ++svm->vcpu.stat.irq_window_exits;
3154                 kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
3155                 return 0;
3156         }
3157 
3158         return 1;
3159 }
3160 
3161 static int pause_interception(struct vcpu_svm *svm)
3162 {
3163         kvm_vcpu_on_spin(&(svm->vcpu));
3164         return 1;
3165 }
3166 
3167 static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
3168         [SVM_EXIT_READ_CR0]                     = cr_interception,
3169         [SVM_EXIT_READ_CR3]                     = cr_interception,
3170         [SVM_EXIT_READ_CR4]                     = cr_interception,
3171         [SVM_EXIT_READ_CR8]                     = cr_interception,
3172         [SVM_EXIT_CR0_SEL_WRITE]                = emulate_on_interception,
3173         [SVM_EXIT_WRITE_CR0]                    = cr_interception,
3174         [SVM_EXIT_WRITE_CR3]                    = cr_interception,
3175         [SVM_EXIT_WRITE_CR4]                    = cr_interception,
3176         [SVM_EXIT_WRITE_CR8]                    = cr8_write_interception,
3177         [SVM_EXIT_READ_DR0]                     = dr_interception,
3178         [SVM_EXIT_READ_DR1]                     = dr_interception,
3179         [SVM_EXIT_READ_DR2]                     = dr_interception,
3180         [SVM_EXIT_READ_DR3]                     = dr_interception,
3181         [SVM_EXIT_READ_DR4]                     = dr_interception,
3182         [SVM_EXIT_READ_DR5]                     = dr_interception,
3183         [SVM_EXIT_READ_DR6]                     = dr_interception,
3184         [SVM_EXIT_READ_DR7]                     = dr_interception,
3185         [SVM_EXIT_WRITE_DR0]                    = dr_interception,
3186         [SVM_EXIT_WRITE_DR1]                    = dr_interception,
3187         [SVM_EXIT_WRITE_DR2]                    = dr_interception,
3188         [SVM_EXIT_WRITE_DR3]                    = dr_interception,
3189         [SVM_EXIT_WRITE_DR4]                    = dr_interception,
3190         [SVM_EXIT_WRITE_DR5]                    = dr_interception,
3191         [SVM_EXIT_WRITE_DR6]                    = dr_interception,
3192         [SVM_EXIT_WRITE_DR7]                    = dr_interception,
3193         [SVM_EXIT_EXCP_BASE + DB_VECTOR]        = db_interception,
3194         [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
3195         [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
3196         [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
3197         [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
3198         [SVM_EXIT_EXCP_BASE + MC_VECTOR]        = mc_interception,
3199         [SVM_EXIT_EXCP_BASE + AC_VECTOR]        = ac_interception,
3200         [SVM_EXIT_INTR]                         = intr_interception,
3201         [SVM_EXIT_NMI]                          = nmi_interception,
3202         [SVM_EXIT_SMI]                          = nop_on_interception,
3203         [SVM_EXIT_INIT]                         = nop_on_interception,
3204         [SVM_EXIT_VINTR]                        = interrupt_window_interception,
3205         [SVM_EXIT_CPUID]                        = cpuid_interception,
3206         [SVM_EXIT_IRET]                         = iret_interception,
3207         [SVM_EXIT_INVD]                         = emulate_on_interception,
3208         [SVM_EXIT_PAUSE]                        = pause_interception,
3209         [SVM_EXIT_HLT]                          = halt_interception,
3210         [SVM_EXIT_INVLPG]                       = invlpg_interception,
3211         [SVM_EXIT_INVLPGA]                      = invlpga_interception,
3212         [SVM_EXIT_IOIO]                         = io_interception,
3213         [SVM_EXIT_MSR]                          = msr_interception,
3214         [SVM_EXIT_TASK_SWITCH]                  = task_switch_interception,
3215         [SVM_EXIT_SHUTDOWN]                     = shutdown_interception,
3216         [SVM_EXIT_VMRUN]                        = vmrun_interception,
3217         [SVM_EXIT_VMMCALL]                      = vmmcall_interception,
3218         [SVM_EXIT_VMLOAD]                       = vmload_interception,
3219         [SVM_EXIT_VMSAVE]                       = vmsave_interception,
3220         [SVM_EXIT_STGI]                         = stgi_interception,
3221         [SVM_EXIT_CLGI]                         = clgi_interception,
3222         [SVM_EXIT_SKINIT]                       = skinit_interception,
3223         [SVM_EXIT_WBINVD]                       = emulate_on_interception,
3224         [SVM_EXIT_MONITOR]                      = invalid_op_interception,
3225         [SVM_EXIT_MWAIT]                        = invalid_op_interception,
3226         [SVM_EXIT_XSETBV]                       = xsetbv_interception,
3227         [SVM_EXIT_NPF]                          = pf_interception,
3228 };
3229 
3230 static void dump_vmcb(struct kvm_vcpu *vcpu)
3231 {
3232         struct vcpu_svm *svm = to_svm(vcpu);
3233         struct vmcb_control_area *control = &svm->vmcb->control;
3234         struct vmcb_save_area *save = &svm->vmcb->save;
3235 
3236         pr_err("VMCB Control Area:\n");
3237         pr_err("%-20s%04x\n", "cr_read:", control->intercept_cr & 0xffff);
3238         pr_err("%-20s%04x\n", "cr_write:", control->intercept_cr >> 16);
3239         pr_err("%-20s%04x\n", "dr_read:", control->intercept_dr & 0xffff);
3240         pr_err("%-20s%04x\n", "dr_write:", control->intercept_dr >> 16);
3241         pr_err("%-20s%08x\n", "exceptions:", control->intercept_exceptions);
3242         pr_err("%-20s%016llx\n", "intercepts:", control->intercept);
3243         pr_err("%-20s%d\n", "pause filter count:", control->pause_filter_count);
3244         pr_err("%-20s%016llx\n", "iopm_base_pa:", control->iopm_base_pa);
3245         pr_err("%-20s%016llx\n", "msrpm_base_pa:", control->msrpm_base_pa);
3246         pr_err("%-20s%016llx\n", "tsc_offset:", control->tsc_offset);
3247         pr_err("%-20s%d\n", "asid:", control->asid);
3248         pr_err("%-20s%d\n", "tlb_ctl:", control->tlb_ctl);
3249         pr_err("%-20s%08x\n", "int_ctl:", control->int_ctl);
3250         pr_err("%-20s%08x\n", "int_vector:", control->int_vector);
3251         pr_err("%-20s%08x\n", "int_state:", control->int_state);
3252         pr_err("%-20s%08x\n", "exit_code:", control->exit_code);
3253         pr_err("%-20s%016llx\n", "exit_info1:", control->exit_info_1);
3254         pr_err("%-20s%016llx\n", "exit_info2:", control->exit_info_2);
3255         pr_err("%-20s%08x\n", "exit_int_info:", control->exit_int_info);
3256         pr_err("%-20s%08x\n", "exit_int_info_err:", control->exit_int_info_err);
3257         pr_err("%-20s%lld\n", "nested_ctl:", control->nested_ctl);
3258         pr_err("%-20s%016llx\n", "nested_cr3:", control->nested_cr3);
3259         pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
3260         pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
3261         pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl);
3262         pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
3263         pr_err("VMCB State Save Area:\n");
3264         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3265                "es:",
3266                save->es.selector, save->es.attrib,
3267                save->es.limit, save->es.base);
3268         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3269                "cs:",
3270                save->cs.selector, save->cs.attrib,
3271                save->cs.limit, save->cs.base);
3272         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3273                "ss:",
3274                save->ss.selector, save->ss.attrib,
3275                save->ss.limit, save->ss.base);
3276         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3277                "ds:",
3278                save->ds.selector, save->ds.attrib,
3279                save->ds.limit, save->ds.base);
3280         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3281                "fs:",
3282                save->fs.selector, save->fs.attrib,
3283                save->fs.limit, save->fs.base);
3284         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3285                "gs:",
3286                save->gs.selector, save->gs.attrib,
3287                save->gs.limit, save->gs.base);
3288         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3289                "gdtr:",
3290                save->gdtr.selector, save->gdtr.attrib,
3291                save->gdtr.limit, save->gdtr.base);
3292         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3293                "ldtr:",
3294                save->ldtr.selector, save->ldtr.attrib,
3295                save->ldtr.limit, save->ldtr.base);
3296         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3297                "idtr:",
3298                save->idtr.selector, save->idtr.attrib,
3299                save->idtr.limit, save->idtr.base);
3300         pr_err("%-5s s: %04x a: %04x l: %08x b: %016llx\n",
3301                "tr:",
3302                save->tr.selector, save->tr.attrib,
3303                save->tr.limit, save->tr.base);
3304         pr_err("cpl:            %d                efer:         %016llx\n",
3305                 save->cpl, save->efer);
3306         pr_err("%-15s %016llx %-13s %016llx\n",
3307                "cr0:", save->cr0, "cr2:", save->cr2);
3308         pr_err("%-15s %016llx %-13s %016llx\n",
3309                "cr3:", save->cr3, "cr4:", save->cr4);
3310         pr_err("%-15s %016llx %-13s %016llx\n",
3311                "dr6:", save->dr6, "dr7:", save->dr7);
3312         pr_err("%-15s %016llx %-13s %016llx\n",
3313                "rip:", save->rip, "rflags:", save->rflags);
3314         pr_err("%-15s %016llx %-13s %016llx\n",
3315                "rsp:", save->rsp, "rax:", save->rax);
3316         pr_err("%-15s %016llx %-13s %016llx\n",
3317                "star:", save->star, "lstar:", save->lstar);
3318         pr_err("%-15s %016llx %-13s %016llx\n",
3319                "cstar:", save->cstar, "sfmask:", save->sfmask);
3320         pr_err("%-15s %016llx %-13s %016llx\n",
3321                "kernel_gs_base:", save->kernel_gs_base,
3322                "sysenter_cs:", save->sysenter_cs);
3323         pr_err("%-15s %016llx %-13s %016llx\n",
3324                "sysenter_esp:", save->sysenter_esp,
3325                "sysenter_eip:", save->sysenter_eip);
3326         pr_err("%-15s %016llx %-13s %016llx\n",
3327                "gpat:", save->g_pat, "dbgctl:", save->dbgctl);
3328         pr_err("%-15s %016llx %-13s %016llx\n",
3329                "br_from:", save->br_from, "br_to:", save->br_to);
3330         pr_err("%-15s %016llx %-13s %016llx\n",
3331                "excp_from:", save->last_excp_from,
3332                "excp_to:", save->last_excp_to);
3333 }
3334 
3335 static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2)
3336 {
3337         struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
3338 
3339         *info1 = control->exit_info_1;
3340         *info2 = control->exit_info_2;
3341 }
3342 
3343 static int handle_exit(struct kvm_vcpu *vcpu)
3344 {
3345         struct vcpu_svm *svm = to_svm(vcpu);
3346         struct kvm_run *kvm_run = vcpu->run;
3347         u32 exit_code = svm->vmcb->control.exit_code;
3348 
3349         if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE))
3350                 vcpu->arch.cr0 = svm->vmcb->save.cr0;
3351         if (npt_enabled)
3352                 vcpu->arch.cr3 = svm->vmcb->save.cr3;
3353 
3354         if (unlikely(svm->nested.exit_required)) {
3355                 nested_svm_vmexit(svm);
3356                 svm->nested.exit_required = false;
3357 
3358                 return 1;
3359         }
3360 
3361         if (is_guest_mode(vcpu)) {
3362                 int vmexit;
3363 
3364                 trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
3365                                         svm->vmcb->control.exit_info_1,
3366                                         svm->vmcb->control.exit_info_2,
3367                                         svm->vmcb->control.exit_int_info,
3368                                         svm->vmcb->control.exit_int_info_err,
3369                                         KVM_ISA_SVM);
3370 
3371                 vmexit = nested_svm_exit_special(svm);
3372 
3373                 if (vmexit == NESTED_EXIT_CONTINUE)
3374                         vmexit = nested_svm_exit_handled(svm);
3375 
3376                 if (vmexit == NESTED_EXIT_DONE)
3377                         return 1;
3378         }
3379 
3380         svm_complete_interrupts(svm);
3381 
3382         if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) {
3383                 kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
3384                 kvm_run->fail_entry.hardware_entry_failure_reason
3385                         = svm->vmcb->control.exit_code;
3386                 pr_err("KVM: FAILED VMRUN WITH VMCB:\n");
3387                 dump_vmcb(vcpu);
3388                 return 0;
3389         }
3390 
3391         if (is_external_interrupt(svm->vmcb->control.exit_int_info) &&
3392             exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR &&
3393             exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH &&
3394             exit_code != SVM_EXIT_INTR && exit_code != SVM_EXIT_NMI)
3395                 printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x "
3396                        "exit_code 0x%x\n",
3397                        __func__, svm->vmcb->control.exit_int_info,
3398                        exit_code);
3399 
3400         if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
3401             || !svm_exit_handlers[exit_code]) {
3402                 WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
3403                 kvm_queue_exception(vcpu, UD_VECTOR);
3404                 return 1;
3405         }
3406 
3407         return svm_exit_handlers[exit_code](svm);
3408 }
3409 
3410 static void reload_tss(struct kvm_vcpu *vcpu)
3411 {
3412         int cpu = raw_smp_processor_id();
3413 
3414         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3415         sd->tss_desc->type = 9; /* available 32/64-bit TSS */
3416         load_TR_desc();
3417 }
3418 
3419 static void pre_svm_run(struct vcpu_svm *svm)
3420 {
3421         int cpu = raw_smp_processor_id();
3422 
3423         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
3424 
3425         /* FIXME: handle wraparound of asid_generation */
3426         if (svm->asid_generation != sd->asid_generation)
3427                 new_asid(svm, sd);
3428 }
3429 
3430 static void svm_inject_nmi(struct kvm_vcpu *vcpu)
3431 {
3432         struct vcpu_svm *svm = to_svm(vcpu);
3433 
3434         svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
3435         vcpu->arch.hflags |= HF_NMI_MASK;
3436         set_intercept(svm, INTERCEPT_IRET);
3437         ++vcpu->stat.nmi_injections;
3438 }
3439 
3440 static inline void svm_inject_irq(struct vcpu_svm *svm, int irq)
3441 {
3442         struct vmcb_control_area *control;
3443 
3444         control = &svm->vmcb->control;
3445         control->int_vector = irq;
3446         control->int_ctl &= ~V_INTR_PRIO_MASK;
3447         control->int_ctl |= V_IRQ_MASK |
3448                 ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT);
3449         mark_dirty(svm->vmcb, VMCB_INTR);
3450 }
3451 
3452 static void svm_set_irq(struct kvm_vcpu *vcpu)
3453 {
3454         struct vcpu_svm *svm = to_svm(vcpu);
3455 
3456         BUG_ON(!(gif_set(svm)));
3457 
3458         trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
3459         ++vcpu->stat.irq_injections;
3460 
3461         svm->vmcb->control.event_inj = vcpu->arch.interrupt.nr |
3462                 SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR;
3463 }
3464 
3465 static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
3466 {
3467         struct vcpu_svm *svm = to_svm(vcpu);
3468 
3469         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3470                 return;
3471 
3472         clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3473 
3474         if (irr == -1)
3475                 return;
3476 
3477         if (tpr >= irr)
3478                 set_cr_intercept(svm, INTERCEPT_CR8_WRITE);
3479 }
3480 
3481 static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
3482 {
3483         struct vcpu_svm *svm = to_svm(vcpu);
3484         struct vmcb *vmcb = svm->vmcb;
3485         int ret;
3486         ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
3487               !(svm->vcpu.arch.hflags & HF_NMI_MASK);
3488         ret = ret && gif_set(svm) && nested_svm_nmi(svm);
3489 
3490         return ret;
3491 }
3492 
3493 static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
3494 {
3495         struct vcpu_svm *svm = to_svm(vcpu);
3496 
3497         return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
3498 }
3499 
3500 static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
3501 {
3502         struct vcpu_svm *svm = to_svm(vcpu);
3503 
3504         if (masked) {
3505                 svm->vcpu.arch.hflags |= HF_NMI_MASK;
3506                 set_intercept(svm, INTERCEPT_IRET);
3507         } else {
3508                 svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
3509                 clr_intercept(svm, INTERCEPT_IRET);
3510         }
3511 }
3512 
3513 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
3514 {
3515         struct vcpu_svm *svm = to_svm(vcpu);
3516         struct vmcb *vmcb = svm->vmcb;
3517         int ret;
3518 
3519         if (!gif_set(svm) ||
3520              (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
3521                 return 0;
3522 
3523         ret = !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF);
3524 
3525         if (is_guest_mode(vcpu))
3526                 return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
3527 
3528         return ret;
3529 }
3530 
3531 static void enable_irq_window(struct kvm_vcpu *vcpu)
3532 {
3533         struct vcpu_svm *svm = to_svm(vcpu);
3534 
3535         /*
3536          * In case GIF=0 we can't rely on the CPU to tell us when GIF becomes
3537          * 1, because that's a separate STGI/VMRUN intercept.  The next time we
3538          * get that intercept, this function will be called again though and
3539          * we'll get the vintr intercept.
3540          */
3541         if (gif_set(svm) && nested_svm_intr(svm)) {
3542                 svm_set_vintr(svm);
3543                 svm_inject_irq(svm, 0x0);
3544         }
3545 }
3546 
3547 static void enable_nmi_window(struct kvm_vcpu *vcpu)
3548 {
3549         struct vcpu_svm *svm = to_svm(vcpu);
3550 
3551         if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK))
3552             == HF_NMI_MASK)
3553                 return; /* IRET will cause a vm exit */
3554 
3555         /*
3556          * Something prevents NMI from been injected. Single step over possible
3557          * problem (IRET or exception injection or interrupt shadow)
3558          */
3559         svm->nmi_singlestep = true;
3560         svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
3561 }
3562 
3563 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
3564 {
3565         return 0;
3566 }
3567 
3568 static void svm_flush_tlb(struct kvm_vcpu *vcpu)
3569 {
3570         struct vcpu_svm *svm = to_svm(vcpu);
3571 
3572         if (static_cpu_has(X86_FEATURE_FLUSHBYASID))
3573                 svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
3574         else
3575                 svm->asid_generation--;
3576 }
3577 
3578 static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
3579 {
3580 }
3581 
3582 static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu)
3583 {
3584         struct vcpu_svm *svm = to_svm(vcpu);
3585 
3586         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3587                 return;
3588 
3589         if (!is_cr_intercept(svm, INTERCEPT_CR8_WRITE)) {
3590                 int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK;
3591                 kvm_set_cr8(vcpu, cr8);
3592         }
3593 }
3594 
3595 static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu)
3596 {
3597         struct vcpu_svm *svm = to_svm(vcpu);
3598         u64 cr8;
3599 
3600         if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
3601                 return;
3602 
3603         cr8 = kvm_get_cr8(vcpu);
3604         svm->vmcb->control.int_ctl &= ~V_TPR_MASK;
3605         svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK;
3606 }
3607 
3608 static void svm_complete_interrupts(struct vcpu_svm *svm)
3609 {
3610         u8 vector;
3611         int type;
3612         u32 exitintinfo = svm->vmcb->control.exit_int_info;
3613         unsigned int3_injected = svm->int3_injected;
3614 
3615         svm->int3_injected = 0;
3616 
3617         /*
3618          * If we've made progress since setting HF_IRET_MASK, we've
3619          * executed an IRET and can allow NMI injection.
3620          */
3621         if ((svm->vcpu.arch.hflags & HF_IRET_MASK)
3622             && kvm_rip_read(&svm->vcpu) != svm->nmi_iret_rip) {
3623                 svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK);
3624                 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3625         }
3626 
3627         svm->vcpu.arch.nmi_injected = false;
3628         kvm_clear_exception_queue(&svm->vcpu);
3629         kvm_clear_interrupt_queue(&svm->vcpu);
3630 
3631         if (!(exitintinfo & SVM_EXITINTINFO_VALID))
3632                 return;
3633 
3634         kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
3635 
3636         vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK;
3637         type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK;
3638 
3639         switch (type) {
3640         case SVM_EXITINTINFO_TYPE_NMI:
3641                 svm->vcpu.arch.nmi_injected = true;
3642                 break;
3643         case SVM_EXITINTINFO_TYPE_EXEPT:
3644                 /*
3645                  * In case of software exceptions, do not reinject the vector,
3646                  * but re-execute the instruction instead. Rewind RIP first
3647                  * if we emulated INT3 before.
3648                  */
3649                 if (kvm_exception_is_soft(vector)) {
3650                         if (vector == BP_VECTOR && int3_injected &&
3651                             kvm_is_linear_rip(&svm->vcpu, svm->int3_rip))
3652                                 kvm_rip_write(&svm->vcpu,
3653                                               kvm_rip_read(&svm->vcpu) -
3654                                               int3_injected);
3655                         break;
3656                 }
3657                 if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) {
3658                         u32 err = svm->vmcb->control.exit_int_info_err;
3659                         kvm_requeue_exception_e(&svm->vcpu, vector, err);
3660 
3661                 } else
3662                         kvm_requeue_exception(&svm->vcpu, vector);
3663                 break;
3664         case SVM_EXITINTINFO_TYPE_INTR:
3665                 kvm_queue_interrupt(&svm->vcpu, vector, false);
3666                 break;
3667         default:
3668                 break;
3669         }
3670 }
3671 
3672 static void svm_cancel_injection(struct kvm_vcpu *vcpu)
3673 {
3674         struct vcpu_svm *svm = to_svm(vcpu);
3675         struct vmcb_control_area *control = &svm->vmcb->control;
3676 
3677         control->exit_int_info = control->event_inj;
3678         control->exit_int_info_err = control->event_inj_err;
3679         control->event_inj = 0;
3680         svm_complete_interrupts(svm);
3681 }
3682 
3683 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
3684 {
3685         struct vcpu_svm *svm = to_svm(vcpu);
3686 
3687         svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
3688         svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
3689         svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
3690 
3691         /*
3692          * A vmexit emulation is required before the vcpu can be executed
3693          * again.
3694          */
3695         if (unlikely(svm->nested.exit_required))
3696                 return;
3697 
3698         pre_svm_run(svm);
3699 
3700         sync_lapic_to_cr8(vcpu);
3701 
3702         svm->vmcb->save.cr2 = vcpu->arch.cr2;
3703 
3704         clgi();
3705 
3706         local_irq_enable();
3707 
3708         asm volatile (
3709                 "push %%" _ASM_BP "; \n\t"
3710                 "mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
3711                 "mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
3712                 "mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
3713                 "mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
3714                 "mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
3715                 "mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
3716 #ifdef CONFIG_X86_64
3717                 "mov %c[r8](%[svm]),  %%r8  \n\t"
3718                 "mov %c[r9](%[svm]),  %%r9  \n\t"
3719                 "mov %c[r10](%[svm]), %%r10 \n\t"
3720                 "mov %c[r11](%[svm]), %%r11 \n\t"
3721                 "mov %c[r12](%[svm]), %%r12 \n\t"
3722                 "mov %c[r13](%[svm]), %%r13 \n\t"
3723                 "mov %c[r14](%[svm]), %%r14 \n\t"
3724                 "mov %c[r15](%[svm]), %%r15 \n\t"
3725 #endif
3726 
3727                 /* Enter guest mode */
3728                 "push %%" _ASM_AX " \n\t"
3729                 "mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
3730                 __ex(SVM_VMLOAD) "\n\t"
3731                 __ex(SVM_VMRUN) "\n\t"
3732                 __ex(SVM_VMSAVE) "\n\t"
3733                 "pop %%" _ASM_AX " \n\t"
3734 
3735                 /* Save guest registers, load host registers */
3736                 "mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
3737                 "mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
3738                 "mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
3739                 "mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
3740                 "mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
3741                 "mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
3742 #ifdef CONFIG_X86_64
3743                 "mov %%r8,  %c[r8](%[svm]) \n\t"
3744                 "mov %%r9,  %c[r9](%[svm]) \n\t"
3745                 "mov %%r10, %c[r10](%[svm]) \n\t"
3746                 "mov %%r11, %c[r11](%[svm]) \n\t"
3747                 "mov %%r12, %c[r12](%[svm]) \n\t"
3748                 "mov %%r13, %c[r13](%[svm]) \n\t"
3749                 "mov %%r14, %c[r14](%[svm]) \n\t"
3750                 "mov %%r15, %c[r15](%[svm]) \n\t"
3751 #endif
3752                 /*
3753                 * Clear host registers marked as clobbered to prevent
3754                 * speculative use.
3755                 */
3756                 "xor %%" _ASM_BX ", %%" _ASM_BX " \n\t"
3757                 "xor %%" _ASM_CX ", %%" _ASM_CX " \n\t"
3758                 "xor %%" _ASM_DX ", %%" _ASM_DX " \n\t"
3759                 "xor %%" _ASM_SI ", %%" _ASM_SI " \n\t"
3760                 "xor %%" _ASM_DI ", %%" _ASM_DI " \n\t"
3761 #ifdef CONFIG_X86_64
3762                 "xor %%r8, %%r8 \n\t"
3763                 "xor %%r9, %%r9 \n\t"
3764                 "xor %%r10, %%r10 \n\t"
3765                 "xor %%r11, %%r11 \n\t"
3766                 "xor %%r12, %%r12 \n\t"
3767                 "xor %%r13, %%r13 \n\t"
3768                 "xor %%r14, %%r14 \n\t"
3769                 "xor %%r15, %%r15 \n\t"
3770 #endif
3771                 "pop %%" _ASM_BP
3772                 :
3773                 : [svm]"a"(svm),
3774                   [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
3775                   [rbx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBX])),
3776                   [rcx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RCX])),
3777                   [rdx]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDX])),
3778                   [rsi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RSI])),
3779                   [rdi]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RDI])),
3780                   [rbp]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_RBP]))
3781 #ifdef CONFIG_X86_64
3782                   , [r8]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R8])),
3783                   [r9]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R9])),
3784                   [r10]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R10])),
3785                   [r11]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R11])),
3786                   [r12]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R12])),
3787                   [r13]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R13])),
3788                   [r14]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R14])),
3789                   [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
3790 #endif
3791                 : "cc", "memory"
3792 #ifdef CONFIG_X86_64
3793                 , "rbx", "rcx", "rdx", "rsi", "rdi"
3794                 , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
3795 #else
3796                 , "ebx", "ecx", "edx", "esi", "edi"
3797 #endif
3798                 );
3799 
3800         /* Eliminate branch target predictions from guest mode */
3801         vmexit_fill_RSB();
3802 
3803 #ifdef CONFIG_X86_64
3804         wrmsrl(MSR_GS_BASE, svm->host.gs_base);
3805 #else
3806         loadsegment(fs, svm->host.fs);
3807 #ifndef CONFIG_X86_32_LAZY_GS
3808         loadsegment(gs, svm->host.gs);
3809 #endif
3810 #endif
3811 
3812         reload_tss(vcpu);
3813 
3814         local_irq_disable();
3815 
3816         vcpu->arch.cr2 = svm->vmcb->save.cr2;
3817         vcpu->arch.regs[VCPU_REGS_RAX] = svm->vmcb->save.rax;
3818         vcpu->arch.regs[VCPU_REGS_RSP] = svm->vmcb->save.rsp;
3819         vcpu->arch.regs[VCPU_REGS_RIP] = svm->vmcb->save.rip;
3820 
3821         trace_kvm_exit(svm->vmcb->control.exit_code, vcpu, KVM_ISA_SVM);
3822 
3823         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3824                 kvm_before_handle_nmi(&svm->vcpu);
3825 
3826         stgi();
3827 
3828         /* Any pending NMI will happen here */
3829 
3830         if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI))
3831                 kvm_after_handle_nmi(&svm->vcpu);
3832 
3833         sync_cr8_to_lapic(vcpu);
3834 
3835         svm->next_rip = 0;
3836 
3837         svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
3838 
3839         /* if exit due to PF check for async PF */
3840         if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
3841                 svm->apf_reason = kvm_read_and_reset_pf_reason();
3842 
3843         if (npt_enabled) {
3844                 vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
3845                 vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
3846         }
3847 
3848         /*
3849          * We need to handle MC intercepts here before the vcpu has a chance to
3850          * change the physical cpu
3851          */
3852         if (unlikely(svm->vmcb->control.exit_code ==
3853                      SVM_EXIT_EXCP_BASE + MC_VECTOR))
3854                 svm_handle_mce(svm);
3855 
3856         mark_all_clean(svm->vmcb);
3857 }
3858 
3859 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3860 {
3861         struct vcpu_svm *svm = to_svm(vcpu);
3862 
3863         svm->vmcb->save.cr3 = root;
3864         mark_dirty(svm->vmcb, VMCB_CR);
3865         svm_flush_tlb(vcpu);
3866 }
3867 
3868 static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root)
3869 {
3870         struct vcpu_svm *svm = to_svm(vcpu);
3871 
3872         svm->vmcb->control.nested_cr3 = root;
3873         mark_dirty(svm->vmcb, VMCB_NPT);
3874 
3875         /* Also sync guest cr3 here in case we live migrate */
3876         svm->vmcb->save.cr3 = kvm_read_cr3(vcpu);
3877         mark_dirty(svm->vmcb, VMCB_CR);
3878 
3879         svm_flush_tlb(vcpu);
3880 }
3881 
3882 static int is_disabled(void)
3883 {
3884         u64 vm_cr;
3885 
3886         rdmsrl(MSR_VM_CR, vm_cr);
3887         if (vm_cr & (1 << SVM_VM_CR_SVM_DISABLE))
3888                 return 1;
3889 
3890         return 0;
3891 }
3892 
3893 static void
3894 svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
3895 {
3896         /*
3897          * Patch in the VMMCALL instruction:
3898          */
3899         hypercall[0] = 0x0f;
3900         hypercall[1] = 0x01;
3901         hypercall[2] = 0xd9;
3902 }
3903 
3904 static void svm_check_processor_compat(void *rtn)
3905 {
3906         *(int *)rtn = 0;
3907 }
3908 
3909 static bool svm_cpu_has_accelerated_tpr(void)
3910 {
3911         return false;
3912 }
3913 
3914 static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
3915 {
3916         return 0;
3917 }
3918 
3919 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
3920 {
3921 }
3922 
3923 static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
3924 {
3925         switch (func) {
3926         case 0x80000001:
3927                 if (nested)
3928                         entry->ecx |= (1 << 2); /* Set SVM bit */
3929                 break;
3930         case 0x8000000A:
3931                 entry->eax = 1; /* SVM revision 1 */
3932                 entry->ebx = 8; /* Lets support 8 ASIDs in case we add proper
3933                                    ASID emulation to nested SVM */
3934                 entry->ecx = 0; /* Reserved */
3935                 entry->edx = 0; /* Per default do not support any
3936                                    additional features */
3937 
3938                 /* Support next_rip if host supports it */
3939                 if (boot_cpu_has(X86_FEATURE_NRIPS))
3940                         entry->edx |= SVM_FEATURE_NRIP;
3941 
3942                 /* Support NPT for the guest if enabled */
3943                 if (npt_enabled)
3944                         entry->edx |= SVM_FEATURE_NPT;
3945 
3946                 break;
3947         }
3948 }
3949 
3950 static int svm_get_lpage_level(void)
3951 {
3952         return PT_PDPE_LEVEL;
3953 }
3954 
3955 static bool svm_rdtscp_supported(void)
3956 {
3957         return false;
3958 }
3959 
3960 static bool svm_has_wbinvd_exit(void)
3961 {
3962         return true;
3963 }
3964 
3965 static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
3966 {
3967         struct vcpu_svm *svm = to_svm(vcpu);
3968 
3969         set_exception_intercept(svm, NM_VECTOR);
3970         update_cr0_intercept(svm);
3971 }
3972 
3973 #define PRE_EX(exit)  { .exit_code = (exit), \
3974                         .stage = X86_ICPT_PRE_EXCEPT, }
3975 #define POST_EX(exit) { .exit_code = (exit), \
3976                         .stage = X86_ICPT_POST_EXCEPT, }
3977 #define POST_MEM(exit) { .exit_code = (exit), \
3978                         .stage = X86_ICPT_POST_MEMACCESS, }
3979 
3980 static struct __x86_intercept {
3981         u32 exit_code;
3982         enum x86_intercept_stage stage;
3983 } x86_intercept_map[] = {
3984         [x86_intercept_cr_read]         = POST_EX(SVM_EXIT_READ_CR0),
3985         [x86_intercept_cr_write]        = POST_EX(SVM_EXIT_WRITE_CR0),
3986         [x86_intercept_clts]            = POST_EX(SVM_EXIT_WRITE_CR0),
3987         [x86_intercept_lmsw]            = POST_EX(SVM_EXIT_WRITE_CR0),
3988         [x86_intercept_smsw]            = POST_EX(SVM_EXIT_READ_CR0),
3989         [x86_intercept_dr_read]         = POST_EX(SVM_EXIT_READ_DR0),
3990         [x86_intercept_dr_write]        = POST_EX(SVM_EXIT_WRITE_DR0),
3991         [x86_intercept_sldt]            = POST_EX(SVM_EXIT_LDTR_READ),
3992         [x86_intercept_str]             = POST_EX(SVM_EXIT_TR_READ),
3993         [x86_intercept_lldt]            = POST_EX(SVM_EXIT_LDTR_WRITE),
3994         [x86_intercept_ltr]             = POST_EX(SVM_EXIT_TR_WRITE),
3995         [x86_intercept_sgdt]            = POST_EX(SVM_EXIT_GDTR_READ),
3996         [x86_intercept_sidt]            = POST_EX(SVM_EXIT_IDTR_READ),
3997         [x86_intercept_lgdt]            = POST_EX(SVM_EXIT_GDTR_WRITE),
3998         [x86_intercept_lidt]            = POST_EX(SVM_EXIT_IDTR_WRITE),
3999         [x86_intercept_vmrun]           = POST_EX(SVM_EXIT_VMRUN),
4000         [x86_intercept_vmmcall]         = POST_EX(SVM_EXIT_VMMCALL),
4001         [x86_intercept_vmload]          = POST_EX(SVM_EXIT_VMLOAD),
4002         [x86_intercept_vmsave]          = POST_EX(SVM_EXIT_VMSAVE),
4003         [x86_intercept_stgi]            = POST_EX(SVM_EXIT_STGI),
4004         [x86_intercept_clgi]            = POST_EX(SVM_EXIT_CLGI),
4005         [x86_intercept_skinit]          = POST_EX(SVM_EXIT_SKINIT),
4006         [x86_intercept_invlpga]         = POST_EX(SVM_EXIT_INVLPGA),
4007         [x86_intercept_rdtscp]          = POST_EX(SVM_EXIT_RDTSCP),
4008         [x86_intercept_monitor]         = POST_MEM(SVM_EXIT_MONITOR),
4009         [x86_intercept_mwait]           = POST_EX(SVM_EXIT_MWAIT),
4010         [x86_intercept_invlpg]          = POST_EX(SVM_EXIT_INVLPG),
4011         [x86_intercept_invd]            = POST_EX(SVM_EXIT_INVD),
4012         [x86_intercept_wbinvd]          = POST_EX(SVM_EXIT_WBINVD),
4013         [x86_intercept_wrmsr]           = POST_EX(SVM_EXIT_MSR),
4014         [x86_intercept_rdtsc]           = POST_EX(SVM_EXIT_RDTSC),
4015         [x86_intercept_rdmsr]           = POST_EX(SVM_EXIT_MSR),
4016         [x86_intercept_rdpmc]           = POST_EX(SVM_EXIT_RDPMC),
4017         [x86_intercept_cpuid]           = PRE_EX(SVM_EXIT_CPUID),
4018         [x86_intercept_rsm]             = PRE_EX(SVM_EXIT_RSM),
4019         [x86_intercept_pause]           = PRE_EX(SVM_EXIT_PAUSE),
4020         [x86_intercept_pushf]           = PRE_EX(SVM_EXIT_PUSHF),
4021         [x86_intercept_popf]            = PRE_EX(SVM_EXIT_POPF),
4022         [x86_intercept_intn]            = PRE_EX(SVM_EXIT_SWINT),
4023         [x86_intercept_iret]            = PRE_EX(SVM_EXIT_IRET),
4024         [x86_intercept_icebp]           = PRE_EX(SVM_EXIT_ICEBP),
4025         [x86_intercept_hlt]             = POST_EX(SVM_EXIT_HLT),
4026         [x86_intercept_in]              = POST_EX(SVM_EXIT_IOIO),
4027         [x86_intercept_ins]             = POST_EX(SVM_EXIT_IOIO),
4028         [x86_intercept_out]             = POST_EX(SVM_EXIT_IOIO),
4029         [x86_intercept_outs]            = POST_EX(SVM_EXIT_IOIO),
4030 };
4031 
4032 #undef PRE_EX
4033 #undef POST_EX
4034 #undef POST_MEM
4035 
4036 static int svm_check_intercept(struct kvm_vcpu *vcpu,
4037                                struct x86_instruction_info *info,
4038                                enum x86_intercept_stage stage)
4039 {
4040         struct vcpu_svm *svm = to_svm(vcpu);
4041         int vmexit, ret = X86EMUL_CONTINUE;
4042         struct __x86_intercept icpt_info;
4043         struct vmcb *vmcb = svm->vmcb;
4044 
4045         if (info->intercept >= ARRAY_SIZE(x86_intercept_map))
4046                 goto out;
4047 
4048         icpt_info = x86_intercept_map[info->intercept];
4049 
4050         if (stage != icpt_info.stage)
4051                 goto out;
4052 
4053         switch (icpt_info.exit_code) {
4054         case SVM_EXIT_READ_CR0:
4055                 if (info->intercept == x86_intercept_cr_read)
4056                         icpt_info.exit_code += info->modrm_reg;
4057                 break;
4058         case SVM_EXIT_WRITE_CR0: {
4059                 unsigned long cr0, val;
4060                 u64 intercept;
4061 
4062                 if (info->intercept == x86_intercept_cr_write)
4063                         icpt_info.exit_code += info->modrm_reg;
4064 
4065                 if (icpt_info.exit_code != SVM_EXIT_WRITE_CR0)
4066                         break;
4067 
4068                 intercept = svm->nested.intercept;
4069 
4070                 if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))
4071                         break;
4072 
4073                 cr0 = vcpu->arch.cr0 & ~SVM_CR0_SELECTIVE_MASK;
4074                 val = info->src_val  & ~SVM_CR0_SELECTIVE_MASK;
4075 
4076                 if (info->intercept == x86_intercept_lmsw) {
4077                         cr0 &= 0xfUL;
4078                         val &= 0xfUL;
4079                         /* lmsw can't clear PE - catch this here */
4080                         if (cr0 & X86_CR0_PE)
4081                                 val |= X86_CR0_PE;
4082                 }
4083 
4084                 if (cr0 ^ val)
4085                         icpt_info.exit_code = SVM_EXIT_CR0_SEL_WRITE;
4086 
4087                 break;
4088         }
4089         case SVM_EXIT_READ_DR0:
4090         case SVM_EXIT_WRITE_DR0:
4091                 icpt_info.exit_code += info->modrm_reg;
4092                 break;
4093         case SVM_EXIT_MSR:
4094                 if (info->intercept == x86_intercept_wrmsr)
4095                         vmcb->control.exit_info_1 = 1;
4096                 else
4097                         vmcb->control.exit_info_1 = 0;
4098                 break;
4099         case SVM_EXIT_PAUSE:
4100                 /*
4101                  * We get this for NOP only, but pause
4102                  * is rep not, check this here
4103                  */
4104                 if (info->rep_prefix != REPE_PREFIX)
4105                         goto out;
4106                 break;
4107         case SVM_EXIT_IOIO: {
4108                 u64 exit_info;
4109                 u32 bytes;
4110 
4111                 exit_info = (vcpu->arch.regs[VCPU_REGS_RDX] & 0xffff) << 16;
4112 
4113                 if (info->intercept == x86_intercept_in ||
4114                     info->intercept == x86_intercept_ins) {
4115                         exit_info |= SVM_IOIO_TYPE_MASK;
4116                         bytes = info->src_bytes;
4117                 } else {
4118                         bytes = info->dst_bytes;
4119                 }
4120 
4121                 if (info->intercept == x86_intercept_outs ||
4122                     info->intercept == x86_intercept_ins)
4123                         exit_info |= SVM_IOIO_STR_MASK;
4124 
4125                 if (info->rep_prefix)
4126                         exit_info |= SVM_IOIO_REP_MASK;
4127 
4128                 bytes = min(bytes, 4u);
4129 
4130                 exit_info |= bytes << SVM_IOIO_SIZE_SHIFT;
4131 
4132                 exit_info |= (u32)info->ad_bytes << (SVM_IOIO_ASIZE_SHIFT - 1);
4133 
4134                 vmcb->control.exit_info_1 = exit_info;
4135                 vmcb->control.exit_info_2 = info->next_rip;
4136 
4137                 break;
4138         }
4139         default:
4140                 break;
4141         }
4142 
4143         vmcb->control.next_rip  = info->next_rip;
4144         vmcb->control.exit_code = icpt_info.exit_code;
4145         vmexit = nested_svm_exit_handled(svm);
4146 
4147         ret = (vmexit == NESTED_EXIT_DONE) ? X86EMUL_INTERCEPTED
4148                                            : X86EMUL_CONTINUE;
4149 
4150 out:
4151         return ret;
4152 }
4153 
4154 static struct kvm_x86_ops svm_x86_ops = {
4155         .cpu_has_kvm_support = has_svm,
4156         .disabled_by_bios = is_disabled,
4157         .hardware_setup = svm_hardware_setup,
4158         .hardware_unsetup = svm_hardware_unsetup,
4159         .check_processor_compatibility = svm_check_processor_compat,
4160         .hardware_enable = svm_hardware_enable,
4161         .hardware_disable = svm_hardware_disable,
4162         .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
4163 
4164         .vcpu_create = svm_create_vcpu,
4165         .vcpu_free = svm_free_vcpu,
4166         .vcpu_reset = svm_vcpu_reset,
4167 
4168         .prepare_guest_switch = svm_prepare_guest_switch,
4169         .vcpu_load = svm_vcpu_load,
4170         .vcpu_put = svm_vcpu_put,
4171 
4172         .set_guest_debug = svm_guest_debug,
4173         .get_msr = svm_get_msr,
4174         .set_msr = svm_set_msr,
4175         .get_segment_base = svm_get_segment_base,
4176         .get_segment = svm_get_segment,
4177         .set_segment = svm_set_segment,
4178         .get_cpl = svm_get_cpl,
4179         .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
4180         .decache_cr0_guest_bits = svm_decache_cr0_guest_bits,
4181         .decache_cr3 = svm_decache_cr3,
4182         .decache_cr4_guest_bits = svm_decache_cr4_guest_bits,
4183         .set_cr0 = svm_set_cr0,
4184         .set_cr3 = svm_set_cr3,
4185         .set_cr4 = svm_set_cr4,
4186         .set_efer = svm_set_efer,
4187         .get_idt = svm_get_idt,
4188         .set_idt = svm_set_idt,
4189         .get_gdt = svm_get_gdt,
4190         .set_gdt = svm_set_gdt,
4191         .set_dr7 = svm_set_dr7,
4192         .cache_reg = svm_cache_reg,
4193         .get_rflags = svm_get_rflags,
4194         .set_rflags = svm_set_rflags,
4195         .fpu_activate = svm_fpu_activate,
4196         .fpu_deactivate = svm_fpu_deactivate,
4197 
4198         .tlb_flush = svm_flush_tlb,
4199 
4200         .run = svm_vcpu_run,
4201         .handle_exit = handle_exit,
4202         .skip_emulated_instruction = skip_emulated_instruction,
4203         .set_interrupt_shadow = svm_set_interrupt_shadow,
4204         .get_interrupt_shadow = svm_get_interrupt_shadow,
4205         .patch_hypercall = svm_patch_hypercall,
4206         .set_irq = svm_set_irq,
4207         .set_nmi = svm_inject_nmi,
4208         .queue_exception = svm_queue_exception,
4209         .cancel_injection = svm_cancel_injection,
4210         .interrupt_allowed = svm_interrupt_allowed,
4211         .nmi_allowed = svm_nmi_allowed,
4212         .get_nmi_mask = svm_get_nmi_mask,
4213         .set_nmi_mask = svm_set_nmi_mask,
4214         .enable_nmi_window = enable_nmi_window,
4215         .enable_irq_window = enable_irq_window,
4216         .update_cr8_intercept = update_cr8_intercept,
4217 
4218         .set_tss_addr = svm_set_tss_addr,
4219         .get_tdp_level = get_npt_level,
4220         .get_mt_mask = svm_get_mt_mask,
4221 
4222         .get_exit_info = svm_get_exit_info,
4223 
4224         .get_lpage_level = svm_get_lpage_level,
4225 
4226         .cpuid_update = svm_cpuid_update,
4227 
4228         .rdtscp_supported = svm_rdtscp_supported,
4229 
4230         .set_supported_cpuid = svm_set_supported_cpuid,
4231 
4232         .has_wbinvd_exit = svm_has_wbinvd_exit,
4233 
4234         .set_tsc_khz = svm_set_tsc_khz,
4235         .write_tsc_offset = svm_write_tsc_offset,
4236         .adjust_tsc_offset = svm_adjust_tsc_offset,
4237         .compute_tsc_offset = svm_compute_tsc_offset,
4238         .read_l1_tsc = svm_read_l1_tsc,
4239 
4240         .set_tdp_cr3 = set_tdp_cr3,
4241 
4242         .check_intercept = svm_check_intercept,
4243 };
4244 
4245 static int __init svm_init(void)
4246 {
4247         return kvm_init(&svm_x86_ops, sizeof(struct vcpu_svm),
4248                         __alignof__(struct vcpu_svm), THIS_MODULE);
4249 }
4250 
4251 static void __exit svm_exit(void)
4252 {
4253         kvm_exit();
4254 }
4255 
4256 module_init(svm_init)
4257 module_exit(svm_exit)
4258 

~ [ source navigation ] ~ [ diff markup ] ~ [ identifier search ] ~

kernel.org | git.kernel.org | LWN.net | Project Home | Wiki (Japanese) | Wiki (English) | SVN repository | Mail admin

Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.

osdn.jp