1 /* 2 * hosting zSeries kernel virtual machines 3 * 4 * Copyright IBM Corp. 2008, 2009 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License (version 2 only) 8 * as published by the Free Software Foundation. 9 * 10 * Author(s): Carsten Otte <cotte@de.ibm.com> 11 * Christian Borntraeger <borntraeger@de.ibm.com> 12 * Heiko Carstens <heiko.carstens@de.ibm.com> 13 * Christian Ehrhardt <ehrhardt@de.ibm.com> 14 */ 15 16 #include <linux/compiler.h> 17 #include <linux/err.h> 18 #include <linux/fs.h> 19 #include <linux/hrtimer.h> 20 #include <linux/init.h> 21 #include <linux/kvm.h> 22 #include <linux/kvm_host.h> 23 #include <linux/module.h> 24 #include <linux/slab.h> 25 #include <linux/timer.h> 26 #include <asm/asm-offsets.h> 27 #include <asm/lowcore.h> 28 #include <asm/pgtable.h> 29 #include <asm/nmi.h> 30 #include <asm/switch_to.h> 31 #include <asm/facility.h> 32 #include <asm/sclp.h> 33 #include "kvm-s390.h" 34 #include "gaccess.h" 35 36 #define CREATE_TRACE_POINTS 37 #include "trace.h" 38 #include "trace-s390.h" 39 40 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU 41 42 struct kvm_stats_debugfs_item debugfs_entries[] = { 43 { "userspace_handled", VCPU_STAT(exit_userspace) }, 44 { "exit_null", VCPU_STAT(exit_null) }, 45 { "exit_validity", VCPU_STAT(exit_validity) }, 46 { "exit_stop_request", VCPU_STAT(exit_stop_request) }, 47 { "exit_external_request", VCPU_STAT(exit_external_request) }, 48 { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, 49 { "exit_instruction", VCPU_STAT(exit_instruction) }, 50 { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, 51 { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, 52 { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, 53 { "instruction_lctl", VCPU_STAT(instruction_lctl) }, 54 { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, 55 { "deliver_external_call", VCPU_STAT(deliver_external_call) }, 56 { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, 57 { "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) }, 58 { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, 59 { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, 60 { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, 61 { "deliver_program_interruption", VCPU_STAT(deliver_program_int) }, 62 { "exit_wait_state", VCPU_STAT(exit_wait_state) }, 63 { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, 64 { "instruction_stidp", VCPU_STAT(instruction_stidp) }, 65 { "instruction_spx", VCPU_STAT(instruction_spx) }, 66 { "instruction_stpx", VCPU_STAT(instruction_stpx) }, 67 { "instruction_stap", VCPU_STAT(instruction_stap) }, 68 { "instruction_storage_key", VCPU_STAT(instruction_storage_key) }, 69 { "instruction_stsch", VCPU_STAT(instruction_stsch) }, 70 { "instruction_chsc", VCPU_STAT(instruction_chsc) }, 71 { "instruction_stsi", VCPU_STAT(instruction_stsi) }, 72 { "instruction_stfl", VCPU_STAT(instruction_stfl) }, 73 { "instruction_tprot", VCPU_STAT(instruction_tprot) }, 74 { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, 75 { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, 76 { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, 77 { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, 78 { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, 79 { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, 80 { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, 81 { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, 82 { "diagnose_10", VCPU_STAT(diagnose_10) }, 83 { "diagnose_44", VCPU_STAT(diagnose_44) }, 84 { "diagnose_9c", VCPU_STAT(diagnose_9c) }, 85 { NULL } 86 }; 87 88 unsigned long *vfacilities; 89 static struct gmap_notifier gmap_notifier; 90 91 /* test availability of vfacility */ 92 static inline int test_vfacility(unsigned long nr) 93 { 94 return __test_facility(nr, (void *) vfacilities); 95 } 96 97 /* Section: not file related */ 98 int kvm_arch_hardware_enable(void *garbage) 99 { 100 /* every s390 is virtualization enabled ;-) */ 101 return 0; 102 } 103 104 void kvm_arch_hardware_disable(void *garbage) 105 { 106 } 107 108 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address); 109 110 int kvm_arch_hardware_setup(void) 111 { 112 gmap_notifier.notifier_call = kvm_gmap_notifier; 113 gmap_register_ipte_notifier(&gmap_notifier); 114 return 0; 115 } 116 117 void kvm_arch_hardware_unsetup(void) 118 { 119 gmap_unregister_ipte_notifier(&gmap_notifier); 120 } 121 122 void kvm_arch_check_processor_compat(void *rtn) 123 { 124 } 125 126 int kvm_arch_init(void *opaque) 127 { 128 return 0; 129 } 130 131 void kvm_arch_exit(void) 132 { 133 } 134 135 /* Section: device related */ 136 long kvm_arch_dev_ioctl(struct file *filp, 137 unsigned int ioctl, unsigned long arg) 138 { 139 if (ioctl == KVM_S390_ENABLE_SIE) 140 return s390_enable_sie(); 141 return -EINVAL; 142 } 143 144 int kvm_dev_ioctl_check_extension(long ext) 145 { 146 int r; 147 148 switch (ext) { 149 case KVM_CAP_S390_PSW: 150 case KVM_CAP_S390_GMAP: 151 case KVM_CAP_SYNC_MMU: 152 #ifdef CONFIG_KVM_S390_UCONTROL 153 case KVM_CAP_S390_UCONTROL: 154 #endif 155 case KVM_CAP_SYNC_REGS: 156 case KVM_CAP_ONE_REG: 157 case KVM_CAP_ENABLE_CAP: 158 case KVM_CAP_S390_CSS_SUPPORT: 159 case KVM_CAP_IOEVENTFD: 160 r = 1; 161 break; 162 case KVM_CAP_NR_VCPUS: 163 case KVM_CAP_MAX_VCPUS: 164 r = KVM_MAX_VCPUS; 165 break; 166 case KVM_CAP_NR_MEMSLOTS: 167 r = KVM_USER_MEM_SLOTS; 168 break; 169 case KVM_CAP_S390_COW: 170 r = MACHINE_HAS_ESOP; 171 break; 172 default: 173 r = 0; 174 } 175 return r; 176 } 177 178 /* Section: vm related */ 179 /* 180 * Get (and clear) the dirty memory log for a memory slot. 181 */ 182 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, 183 struct kvm_dirty_log *log) 184 { 185 return 0; 186 } 187 188 long kvm_arch_vm_ioctl(struct file *filp, 189 unsigned int ioctl, unsigned long arg) 190 { 191 struct kvm *kvm = filp->private_data; 192 void __user *argp = (void __user *)arg; 193 int r; 194 195 switch (ioctl) { 196 case KVM_S390_INTERRUPT: { 197 struct kvm_s390_interrupt s390int; 198 199 r = -EFAULT; 200 if (copy_from_user(&s390int, argp, sizeof(s390int))) 201 break; 202 r = kvm_s390_inject_vm(kvm, &s390int); 203 break; 204 } 205 default: 206 r = -ENOTTY; 207 } 208 209 return r; 210 } 211 212 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) 213 { 214 int rc; 215 char debug_name[16]; 216 217 rc = -EINVAL; 218 #ifdef CONFIG_KVM_S390_UCONTROL 219 if (type & ~KVM_VM_S390_UCONTROL) 220 goto out_err; 221 if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN))) 222 goto out_err; 223 #else 224 if (type) 225 goto out_err; 226 #endif 227 228 rc = s390_enable_sie(); 229 if (rc) 230 goto out_err; 231 232 rc = -ENOMEM; 233 234 kvm->arch.sca = (struct sca_block *) get_zeroed_page(GFP_KERNEL); 235 if (!kvm->arch.sca) 236 goto out_err; 237 238 sprintf(debug_name, "kvm-%u", current->pid); 239 240 kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long)); 241 if (!kvm->arch.dbf) 242 goto out_nodbf; 243 244 spin_lock_init(&kvm->arch.float_int.lock); 245 INIT_LIST_HEAD(&kvm->arch.float_int.list); 246 247 debug_register_view(kvm->arch.dbf, &debug_sprintf_view); 248 VM_EVENT(kvm, 3, "%s", "vm created"); 249 250 if (type & KVM_VM_S390_UCONTROL) { 251 kvm->arch.gmap = NULL; 252 } else { 253 kvm->arch.gmap = gmap_alloc(current->mm); 254 if (!kvm->arch.gmap) 255 goto out_nogmap; 256 kvm->arch.gmap->private = kvm; 257 } 258 259 kvm->arch.css_support = 0; 260 261 return 0; 262 out_nogmap: 263 debug_unregister(kvm->arch.dbf); 264 out_nodbf: 265 free_page((unsigned long)(kvm->arch.sca)); 266 out_err: 267 return rc; 268 } 269 270 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) 271 { 272 VCPU_EVENT(vcpu, 3, "%s", "free cpu"); 273 trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id); 274 if (!kvm_is_ucontrol(vcpu->kvm)) { 275 clear_bit(63 - vcpu->vcpu_id, 276 (unsigned long *) &vcpu->kvm->arch.sca->mcn); 277 if (vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda == 278 (__u64) vcpu->arch.sie_block) 279 vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sda = 0; 280 } 281 smp_mb(); 282 283 if (kvm_is_ucontrol(vcpu->kvm)) 284 gmap_free(vcpu->arch.gmap); 285 286 free_page((unsigned long)(vcpu->arch.sie_block)); 287 kvm_vcpu_uninit(vcpu); 288 kmem_cache_free(kvm_vcpu_cache, vcpu); 289 } 290 291 static void kvm_free_vcpus(struct kvm *kvm) 292 { 293 unsigned int i; 294 struct kvm_vcpu *vcpu; 295 296 kvm_for_each_vcpu(i, vcpu, kvm) 297 kvm_arch_vcpu_destroy(vcpu); 298 299 mutex_lock(&kvm->lock); 300 for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) 301 kvm->vcpus[i] = NULL; 302 303 atomic_set(&kvm->online_vcpus, 0); 304 mutex_unlock(&kvm->lock); 305 } 306 307 void kvm_arch_sync_events(struct kvm *kvm) 308 { 309 } 310 311 void kvm_arch_destroy_vm(struct kvm *kvm) 312 { 313 kvm_free_vcpus(kvm); 314 free_page((unsigned long)(kvm->arch.sca)); 315 debug_unregister(kvm->arch.dbf); 316 if (!kvm_is_ucontrol(kvm)) 317 gmap_free(kvm->arch.gmap); 318 } 319 320 /* Section: vcpu related */ 321 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) 322 { 323 if (kvm_is_ucontrol(vcpu->kvm)) { 324 vcpu->arch.gmap = gmap_alloc(current->mm); 325 if (!vcpu->arch.gmap) 326 return -ENOMEM; 327 vcpu->arch.gmap->private = vcpu->kvm; 328 return 0; 329 } 330 331 vcpu->arch.gmap = vcpu->kvm->arch.gmap; 332 vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX | 333 KVM_SYNC_GPRS | 334 KVM_SYNC_ACRS | 335 KVM_SYNC_CRS; 336 return 0; 337 } 338 339 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) 340 { 341 /* Nothing todo */ 342 } 343 344 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) 345 { 346 save_fp_ctl(&vcpu->arch.host_fpregs.fpc); 347 save_fp_regs(vcpu->arch.host_fpregs.fprs); 348 save_access_regs(vcpu->arch.host_acrs); 349 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 350 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 351 restore_access_regs(vcpu->run->s.regs.acrs); 352 gmap_enable(vcpu->arch.gmap); 353 atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 354 } 355 356 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) 357 { 358 atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); 359 gmap_disable(vcpu->arch.gmap); 360 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 361 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 362 save_access_regs(vcpu->run->s.regs.acrs); 363 restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); 364 restore_fp_regs(vcpu->arch.host_fpregs.fprs); 365 restore_access_regs(vcpu->arch.host_acrs); 366 } 367 368 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu) 369 { 370 /* this equals initial cpu reset in pop, but we don't switch to ESA */ 371 vcpu->arch.sie_block->gpsw.mask = 0UL; 372 vcpu->arch.sie_block->gpsw.addr = 0UL; 373 kvm_s390_set_prefix(vcpu, 0); 374 vcpu->arch.sie_block->cputm = 0UL; 375 vcpu->arch.sie_block->ckc = 0UL; 376 vcpu->arch.sie_block->todpr = 0; 377 memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64)); 378 vcpu->arch.sie_block->gcr[0] = 0xE0UL; 379 vcpu->arch.sie_block->gcr[14] = 0xC2000000UL; 380 vcpu->arch.guest_fpregs.fpc = 0; 381 asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc)); 382 vcpu->arch.sie_block->gbea = 1; 383 atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 384 } 385 386 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) 387 { 388 return 0; 389 } 390 391 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) 392 { 393 atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH | 394 CPUSTAT_SM | 395 CPUSTAT_STOPPED | 396 CPUSTAT_GED); 397 vcpu->arch.sie_block->ecb = 6; 398 if (test_vfacility(50) && test_vfacility(73)) 399 vcpu->arch.sie_block->ecb |= 0x10; 400 401 vcpu->arch.sie_block->ecb2 = 8; 402 vcpu->arch.sie_block->eca = 0xC1002001U; 403 vcpu->arch.sie_block->fac = (int) (long) vfacilities; 404 hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS); 405 tasklet_init(&vcpu->arch.tasklet, kvm_s390_tasklet, 406 (unsigned long) vcpu); 407 vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup; 408 get_cpu_id(&vcpu->arch.cpu_id); 409 vcpu->arch.cpu_id.version = 0xff; 410 return 0; 411 } 412 413 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, 414 unsigned int id) 415 { 416 struct kvm_vcpu *vcpu; 417 struct sie_page *sie_page; 418 int rc = -EINVAL; 419 420 if (id >= KVM_MAX_VCPUS) 421 goto out; 422 423 rc = -ENOMEM; 424 425 vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL); 426 if (!vcpu) 427 goto out; 428 429 sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL); 430 if (!sie_page) 431 goto out_free_cpu; 432 433 vcpu->arch.sie_block = &sie_page->sie_block; 434 vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; 435 436 vcpu->arch.sie_block->icpua = id; 437 if (!kvm_is_ucontrol(kvm)) { 438 if (!kvm->arch.sca) { 439 WARN_ON_ONCE(1); 440 goto out_free_cpu; 441 } 442 if (!kvm->arch.sca->cpu[id].sda) 443 kvm->arch.sca->cpu[id].sda = 444 (__u64) vcpu->arch.sie_block; 445 vcpu->arch.sie_block->scaoh = 446 (__u32)(((__u64)kvm->arch.sca) >> 32); 447 vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; 448 set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); 449 } 450 451 spin_lock_init(&vcpu->arch.local_int.lock); 452 INIT_LIST_HEAD(&vcpu->arch.local_int.list); 453 vcpu->arch.local_int.float_int = &kvm->arch.float_int; 454 spin_lock(&kvm->arch.float_int.lock); 455 kvm->arch.float_int.local_int[id] = &vcpu->arch.local_int; 456 vcpu->arch.local_int.wq = &vcpu->wq; 457 vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags; 458 spin_unlock(&kvm->arch.float_int.lock); 459 460 rc = kvm_vcpu_init(vcpu, kvm, id); 461 if (rc) 462 goto out_free_sie_block; 463 VM_EVENT(kvm, 3, "create cpu %d at %p, sie block at %p", id, vcpu, 464 vcpu->arch.sie_block); 465 trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block); 466 467 return vcpu; 468 out_free_sie_block: 469 free_page((unsigned long)(vcpu->arch.sie_block)); 470 out_free_cpu: 471 kmem_cache_free(kvm_vcpu_cache, vcpu); 472 out: 473 return ERR_PTR(rc); 474 } 475 476 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) 477 { 478 /* kvm common code refers to this, but never calls it */ 479 BUG(); 480 return 0; 481 } 482 483 void s390_vcpu_block(struct kvm_vcpu *vcpu) 484 { 485 atomic_set_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 486 } 487 488 void s390_vcpu_unblock(struct kvm_vcpu *vcpu) 489 { 490 atomic_clear_mask(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20); 491 } 492 493 /* 494 * Kick a guest cpu out of SIE and wait until SIE is not running. 495 * If the CPU is not running (e.g. waiting as idle) the function will 496 * return immediately. */ 497 void exit_sie(struct kvm_vcpu *vcpu) 498 { 499 atomic_set_mask(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags); 500 while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE) 501 cpu_relax(); 502 } 503 504 /* Kick a guest cpu out of SIE and prevent SIE-reentry */ 505 void exit_sie_sync(struct kvm_vcpu *vcpu) 506 { 507 s390_vcpu_block(vcpu); 508 exit_sie(vcpu); 509 } 510 511 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long address) 512 { 513 int i; 514 struct kvm *kvm = gmap->private; 515 struct kvm_vcpu *vcpu; 516 517 kvm_for_each_vcpu(i, vcpu, kvm) { 518 /* match against both prefix pages */ 519 if (vcpu->arch.sie_block->prefix == (address & ~0x1000UL)) { 520 VCPU_EVENT(vcpu, 2, "gmap notifier for %lx", address); 521 kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); 522 exit_sie_sync(vcpu); 523 } 524 } 525 } 526 527 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) 528 { 529 /* kvm common code refers to this, but never calls it */ 530 BUG(); 531 return 0; 532 } 533 534 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, 535 struct kvm_one_reg *reg) 536 { 537 int r = -EINVAL; 538 539 switch (reg->id) { 540 case KVM_REG_S390_TODPR: 541 r = put_user(vcpu->arch.sie_block->todpr, 542 (u32 __user *)reg->addr); 543 break; 544 case KVM_REG_S390_EPOCHDIFF: 545 r = put_user(vcpu->arch.sie_block->epoch, 546 (u64 __user *)reg->addr); 547 break; 548 case KVM_REG_S390_CPU_TIMER: 549 r = put_user(vcpu->arch.sie_block->cputm, 550 (u64 __user *)reg->addr); 551 break; 552 case KVM_REG_S390_CLOCK_COMP: 553 r = put_user(vcpu->arch.sie_block->ckc, 554 (u64 __user *)reg->addr); 555 break; 556 default: 557 break; 558 } 559 560 return r; 561 } 562 563 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 564 struct kvm_one_reg *reg) 565 { 566 int r = -EINVAL; 567 568 switch (reg->id) { 569 case KVM_REG_S390_TODPR: 570 r = get_user(vcpu->arch.sie_block->todpr, 571 (u32 __user *)reg->addr); 572 break; 573 case KVM_REG_S390_EPOCHDIFF: 574 r = get_user(vcpu->arch.sie_block->epoch, 575 (u64 __user *)reg->addr); 576 break; 577 case KVM_REG_S390_CPU_TIMER: 578 r = get_user(vcpu->arch.sie_block->cputm, 579 (u64 __user *)reg->addr); 580 break; 581 case KVM_REG_S390_CLOCK_COMP: 582 r = get_user(vcpu->arch.sie_block->ckc, 583 (u64 __user *)reg->addr); 584 break; 585 default: 586 break; 587 } 588 589 return r; 590 } 591 592 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu) 593 { 594 kvm_s390_vcpu_initial_reset(vcpu); 595 return 0; 596 } 597 598 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 599 { 600 memcpy(&vcpu->run->s.regs.gprs, ®s->gprs, sizeof(regs->gprs)); 601 return 0; 602 } 603 604 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) 605 { 606 memcpy(®s->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs)); 607 return 0; 608 } 609 610 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, 611 struct kvm_sregs *sregs) 612 { 613 memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs)); 614 memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs)); 615 restore_access_regs(vcpu->run->s.regs.acrs); 616 return 0; 617 } 618 619 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, 620 struct kvm_sregs *sregs) 621 { 622 memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs)); 623 memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs)); 624 return 0; 625 } 626 627 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 628 { 629 if (test_fp_ctl(fpu->fpc)) 630 return -EINVAL; 631 memcpy(&vcpu->arch.guest_fpregs.fprs, &fpu->fprs, sizeof(fpu->fprs)); 632 vcpu->arch.guest_fpregs.fpc = fpu->fpc; 633 restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 634 restore_fp_regs(vcpu->arch.guest_fpregs.fprs); 635 return 0; 636 } 637 638 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu) 639 { 640 memcpy(&fpu->fprs, &vcpu->arch.guest_fpregs.fprs, sizeof(fpu->fprs)); 641 fpu->fpc = vcpu->arch.guest_fpregs.fpc; 642 return 0; 643 } 644 645 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw) 646 { 647 int rc = 0; 648 649 if (!(atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_STOPPED)) 650 rc = -EBUSY; 651 else { 652 vcpu->run->psw_mask = psw.mask; 653 vcpu->run->psw_addr = psw.addr; 654 } 655 return rc; 656 } 657 658 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, 659 struct kvm_translation *tr) 660 { 661 return -EINVAL; /* not implemented yet */ 662 } 663 664 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, 665 struct kvm_guest_debug *dbg) 666 { 667 return -EINVAL; /* not implemented yet */ 668 } 669 670 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, 671 struct kvm_mp_state *mp_state) 672 { 673 return -EINVAL; /* not implemented yet */ 674 } 675 676 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, 677 struct kvm_mp_state *mp_state) 678 { 679 return -EINVAL; /* not implemented yet */ 680 } 681 682 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) 683 { 684 /* 685 * We use MMU_RELOAD just to re-arm the ipte notifier for the 686 * guest prefix page. gmap_ipte_notify will wait on the ptl lock. 687 * This ensures that the ipte instruction for this request has 688 * already finished. We might race against a second unmapper that 689 * wants to set the blocking bit. Lets just retry the request loop. 690 */ 691 while (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) { 692 int rc; 693 rc = gmap_ipte_notify(vcpu->arch.gmap, 694 vcpu->arch.sie_block->prefix, 695 PAGE_SIZE * 2); 696 if (rc) 697 return rc; 698 s390_vcpu_unblock(vcpu); 699 } 700 return 0; 701 } 702 703 static int vcpu_pre_run(struct kvm_vcpu *vcpu) 704 { 705 int rc, cpuflags; 706 707 memcpy(&vcpu->arch.sie_block->gg14, &vcpu->run->s.regs.gprs[14], 16); 708 709 if (need_resched()) 710 schedule(); 711 712 if (test_thread_flag(TIF_MCCK_PENDING)) 713 s390_handle_mcck(); 714 715 if (!kvm_is_ucontrol(vcpu->kvm)) 716 kvm_s390_deliver_pending_interrupts(vcpu); 717 718 rc = kvm_s390_handle_requests(vcpu); 719 if (rc) 720 return rc; 721 722 vcpu->arch.sie_block->icptcode = 0; 723 cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags); 724 VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags); 725 trace_kvm_s390_sie_enter(vcpu, cpuflags); 726 727 return 0; 728 } 729 730 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) 731 { 732 int rc; 733 734 VCPU_EVENT(vcpu, 6, "exit sie icptcode %d", 735 vcpu->arch.sie_block->icptcode); 736 trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode); 737 738 if (exit_reason >= 0) { 739 rc = 0; 740 } else if (kvm_is_ucontrol(vcpu->kvm)) { 741 vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; 742 vcpu->run->s390_ucontrol.trans_exc_code = 743 current->thread.gmap_addr; 744 vcpu->run->s390_ucontrol.pgm_code = 0x10; 745 rc = -EREMOTE; 746 } else { 747 VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); 748 trace_kvm_s390_sie_fault(vcpu); 749 rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); 750 } 751 752 memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); 753 754 if (rc == 0) { 755 if (kvm_is_ucontrol(vcpu->kvm)) 756 /* Don't exit for host interrupts. */ 757 rc = vcpu->arch.sie_block->icptcode ? -EOPNOTSUPP : 0; 758 else 759 rc = kvm_handle_sie_intercept(vcpu); 760 } 761 762 return rc; 763 } 764 765 static int __vcpu_run(struct kvm_vcpu *vcpu) 766 { 767 int rc, exit_reason; 768 769 /* 770 * We try to hold kvm->srcu during most of vcpu_run (except when run- 771 * ning the guest), so that memslots (and other stuff) are protected 772 */ 773 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 774 775 do { 776 rc = vcpu_pre_run(vcpu); 777 if (rc) 778 break; 779 780 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 781 /* 782 * As PF_VCPU will be used in fault handler, between 783 * guest_enter and guest_exit should be no uaccess. 784 */ 785 preempt_disable(); 786 kvm_guest_enter(); 787 preempt_enable(); 788 exit_reason = sie64a(vcpu->arch.sie_block, 789 vcpu->run->s.regs.gprs); 790 kvm_guest_exit(); 791 vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); 792 793 rc = vcpu_post_run(vcpu, exit_reason); 794 } while (!signal_pending(current) && !rc); 795 796 srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); 797 return rc; 798 } 799 800 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) 801 { 802 int rc; 803 sigset_t sigsaved; 804 805 if (vcpu->sigset_active) 806 sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); 807 808 atomic_clear_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags); 809 810 BUG_ON(vcpu->kvm->arch.float_int.local_int[vcpu->vcpu_id] == NULL); 811 812 switch (kvm_run->exit_reason) { 813 case KVM_EXIT_S390_SIEIC: 814 case KVM_EXIT_UNKNOWN: 815 case KVM_EXIT_INTR: 816 case KVM_EXIT_S390_RESET: 817 case KVM_EXIT_S390_UCONTROL: 818 case KVM_EXIT_S390_TSCH: 819 break; 820 default: 821 BUG(); 822 } 823 824 vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask; 825 vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr; 826 if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX) { 827 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_PREFIX; 828 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 829 } 830 if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) { 831 kvm_run->kvm_dirty_regs &= ~KVM_SYNC_CRS; 832 memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128); 833 kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix); 834 } 835 836 might_fault(); 837 rc = __vcpu_run(vcpu); 838 839 if (signal_pending(current) && !rc) { 840 kvm_run->exit_reason = KVM_EXIT_INTR; 841 rc = -EINTR; 842 } 843 844 if (rc == -EOPNOTSUPP) { 845 /* intercept cannot be handled in-kernel, prepare kvm-run */ 846 kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; 847 kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode; 848 kvm_run->s390_sieic.ipa = vcpu->arch.sie_block->ipa; 849 kvm_run->s390_sieic.ipb = vcpu->arch.sie_block->ipb; 850 rc = 0; 851 } 852 853 if (rc == -EREMOTE) { 854 /* intercept was handled, but userspace support is needed 855 * kvm_run has been prepared by the handler */ 856 rc = 0; 857 } 858 859 kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask; 860 kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr; 861 kvm_run->s.regs.prefix = vcpu->arch.sie_block->prefix; 862 memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128); 863 864 if (vcpu->sigset_active) 865 sigprocmask(SIG_SETMASK, &sigsaved, NULL); 866 867 vcpu->stat.exit_userspace++; 868 return rc; 869 } 870 871 static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from, 872 unsigned long n, int prefix) 873 { 874 if (prefix) 875 return copy_to_guest(vcpu, guestdest, from, n); 876 else 877 return copy_to_guest_absolute(vcpu, guestdest, from, n); 878 } 879 880 /* 881 * store status at address 882 * we use have two special cases: 883 * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit 884 * KVM_S390_STORE_STATUS_PREFIXED: -> prefix 885 */ 886 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) 887 { 888 unsigned char archmode = 1; 889 int prefix; 890 u64 clkcomp; 891 892 if (addr == KVM_S390_STORE_STATUS_NOADDR) { 893 if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) 894 return -EFAULT; 895 addr = SAVE_AREA_BASE; 896 prefix = 0; 897 } else if (addr == KVM_S390_STORE_STATUS_PREFIXED) { 898 if (copy_to_guest(vcpu, 163ul, &archmode, 1)) 899 return -EFAULT; 900 addr = SAVE_AREA_BASE; 901 prefix = 1; 902 } else 903 prefix = 0; 904 905 if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), 906 vcpu->arch.guest_fpregs.fprs, 128, prefix)) 907 return -EFAULT; 908 909 if (__guestcopy(vcpu, addr + offsetof(struct save_area, gp_regs), 910 vcpu->run->s.regs.gprs, 128, prefix)) 911 return -EFAULT; 912 913 if (__guestcopy(vcpu, addr + offsetof(struct save_area, psw), 914 &vcpu->arch.sie_block->gpsw, 16, prefix)) 915 return -EFAULT; 916 917 if (__guestcopy(vcpu, addr + offsetof(struct save_area, pref_reg), 918 &vcpu->arch.sie_block->prefix, 4, prefix)) 919 return -EFAULT; 920 921 if (__guestcopy(vcpu, 922 addr + offsetof(struct save_area, fp_ctrl_reg), 923 &vcpu->arch.guest_fpregs.fpc, 4, prefix)) 924 return -EFAULT; 925 926 if (__guestcopy(vcpu, addr + offsetof(struct save_area, tod_reg), 927 &vcpu->arch.sie_block->todpr, 4, prefix)) 928 return -EFAULT; 929 930 if (__guestcopy(vcpu, addr + offsetof(struct save_area, timer), 931 &vcpu->arch.sie_block->cputm, 8, prefix)) 932 return -EFAULT; 933 934 clkcomp = vcpu->arch.sie_block->ckc >> 8; 935 if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), 936 &clkcomp, 8, prefix)) 937 return -EFAULT; 938 939 if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), 940 &vcpu->run->s.regs.acrs, 64, prefix)) 941 return -EFAULT; 942 943 if (__guestcopy(vcpu, 944 addr + offsetof(struct save_area, ctrl_regs), 945 &vcpu->arch.sie_block->gcr, 128, prefix)) 946 return -EFAULT; 947 return 0; 948 } 949 950 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) 951 { 952 /* 953 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy 954 * copying in vcpu load/put. Lets update our copies before we save 955 * it into the save area 956 */ 957 save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); 958 save_fp_regs(vcpu->arch.guest_fpregs.fprs); 959 save_access_regs(vcpu->run->s.regs.acrs); 960 961 return kvm_s390_store_status_unloaded(vcpu, addr); 962 } 963 964 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, 965 struct kvm_enable_cap *cap) 966 { 967 int r; 968 969 if (cap->flags) 970 return -EINVAL; 971 972 switch (cap->cap) { 973 case KVM_CAP_S390_CSS_SUPPORT: 974 if (!vcpu->kvm->arch.css_support) { 975 vcpu->kvm->arch.css_support = 1; 976 trace_kvm_s390_enable_css(vcpu->kvm); 977 } 978 r = 0; 979 break; 980 default: 981 r = -EINVAL; 982 break; 983 } 984 return r; 985 } 986 987 long kvm_arch_vcpu_ioctl(struct file *filp, 988 unsigned int ioctl, unsigned long arg) 989 { 990 struct kvm_vcpu *vcpu = filp->private_data; 991 void __user *argp = (void __user *)arg; 992 int idx; 993 long r; 994 995 switch (ioctl) { 996 case KVM_S390_INTERRUPT: { 997 struct kvm_s390_interrupt s390int; 998 999 r = -EFAULT; 1000 if (copy_from_user(&s390int, argp, sizeof(s390int))) 1001 break; 1002 r = kvm_s390_inject_vcpu(vcpu, &s390int); 1003 break; 1004 } 1005 case KVM_S390_STORE_STATUS: 1006 idx = srcu_read_lock(&vcpu->kvm->srcu); 1007 r = kvm_s390_vcpu_store_status(vcpu, arg); 1008 srcu_read_unlock(&vcpu->kvm->srcu, idx); 1009 break; 1010 case KVM_S390_SET_INITIAL_PSW: { 1011 psw_t psw; 1012 1013 r = -EFAULT; 1014 if (copy_from_user(&psw, argp, sizeof(psw))) 1015 break; 1016 r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw); 1017 break; 1018 } 1019 case KVM_S390_INITIAL_RESET: 1020 r = kvm_arch_vcpu_ioctl_initial_reset(vcpu); 1021 break; 1022 case KVM_SET_ONE_REG: 1023 case KVM_GET_ONE_REG: { 1024 struct kvm_one_reg reg; 1025 r = -EFAULT; 1026 if (copy_from_user(®, argp, sizeof(reg))) 1027 break; 1028 if (ioctl == KVM_SET_ONE_REG) 1029 r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, ®); 1030 else 1031 r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, ®); 1032 break; 1033 } 1034 #ifdef CONFIG_KVM_S390_UCONTROL 1035 case KVM_S390_UCAS_MAP: { 1036 struct kvm_s390_ucas_mapping ucasmap; 1037 1038 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1039 r = -EFAULT; 1040 break; 1041 } 1042 1043 if (!kvm_is_ucontrol(vcpu->kvm)) { 1044 r = -EINVAL; 1045 break; 1046 } 1047 1048 r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr, 1049 ucasmap.vcpu_addr, ucasmap.length); 1050 break; 1051 } 1052 case KVM_S390_UCAS_UNMAP: { 1053 struct kvm_s390_ucas_mapping ucasmap; 1054 1055 if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) { 1056 r = -EFAULT; 1057 break; 1058 } 1059 1060 if (!kvm_is_ucontrol(vcpu->kvm)) { 1061 r = -EINVAL; 1062 break; 1063 } 1064 1065 r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr, 1066 ucasmap.length); 1067 break; 1068 } 1069 #endif 1070 case KVM_S390_VCPU_FAULT: { 1071 r = gmap_fault(arg, vcpu->arch.gmap); 1072 if (!IS_ERR_VALUE(r)) 1073 r = 0; 1074 break; 1075 } 1076 case KVM_ENABLE_CAP: 1077 { 1078 struct kvm_enable_cap cap; 1079 r = -EFAULT; 1080 if (copy_from_user(&cap, argp, sizeof(cap))) 1081 break; 1082 r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); 1083 break; 1084 } 1085 default: 1086 r = -ENOTTY; 1087 } 1088 return r; 1089 } 1090 1091 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) 1092 { 1093 #ifdef CONFIG_KVM_S390_UCONTROL 1094 if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) 1095 && (kvm_is_ucontrol(vcpu->kvm))) { 1096 vmf->page = virt_to_page(vcpu->arch.sie_block); 1097 get_page(vmf->page); 1098 return 0; 1099 } 1100 #endif 1101 return VM_FAULT_SIGBUS; 1102 } 1103 1104 void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, 1105 struct kvm_memory_slot *dont) 1106 { 1107 } 1108 1109 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, 1110 unsigned long npages) 1111 { 1112 return 0; 1113 } 1114 1115 void kvm_arch_memslots_updated(struct kvm *kvm) 1116 { 1117 } 1118 1119 /* Section: memory related */ 1120 int kvm_arch_prepare_memory_region(struct kvm *kvm, 1121 struct kvm_memory_slot *memslot, 1122 struct kvm_userspace_memory_region *mem, 1123 enum kvm_mr_change change) 1124 { 1125 /* A few sanity checks. We can have memory slots which have to be 1126 located/ended at a segment boundary (1MB). The memory in userland is 1127 ok to be fragmented into various different vmas. It is okay to mmap() 1128 and munmap() stuff in this slot after doing this call at any time */ 1129 1130 if (mem->userspace_addr & 0xffffful) 1131 return -EINVAL; 1132 1133 if (mem->memory_size & 0xffffful) 1134 return -EINVAL; 1135 1136 return 0; 1137 } 1138 1139 void kvm_arch_commit_memory_region(struct kvm *kvm, 1140 struct kvm_userspace_memory_region *mem, 1141 const struct kvm_memory_slot *old, 1142 enum kvm_mr_change change) 1143 { 1144 int rc; 1145 1146 /* If the basics of the memslot do not change, we do not want 1147 * to update the gmap. Every update causes several unnecessary 1148 * segment translation exceptions. This is usually handled just 1149 * fine by the normal fault handler + gmap, but it will also 1150 * cause faults on the prefix page of running guest CPUs. 1151 */ 1152 if (old->userspace_addr == mem->userspace_addr && 1153 old->base_gfn * PAGE_SIZE == mem->guest_phys_addr && 1154 old->npages * PAGE_SIZE == mem->memory_size) 1155 return; 1156 1157 rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr, 1158 mem->guest_phys_addr, mem->memory_size); 1159 if (rc) 1160 printk(KERN_WARNING "kvm-s390: failed to commit memory region\n"); 1161 return; 1162 } 1163 1164 void kvm_arch_flush_shadow_all(struct kvm *kvm) 1165 { 1166 } 1167 1168 void kvm_arch_flush_shadow_memslot(struct kvm *kvm, 1169 struct kvm_memory_slot *slot) 1170 { 1171 } 1172 1173 static int __init kvm_s390_init(void) 1174 { 1175 int ret; 1176 ret = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); 1177 if (ret) 1178 return ret; 1179 1180 /* 1181 * guests can ask for up to 255+1 double words, we need a full page 1182 * to hold the maximum amount of facilities. On the other hand, we 1183 * only set facilities that are known to work in KVM. 1184 */ 1185 vfacilities = (unsigned long *) get_zeroed_page(GFP_KERNEL|GFP_DMA); 1186 if (!vfacilities) { 1187 kvm_exit(); 1188 return -ENOMEM; 1189 } 1190 memcpy(vfacilities, S390_lowcore.stfle_fac_list, 16); 1191 vfacilities[0] &= 0xff82fff3f4fc2000UL; 1192 vfacilities[1] &= 0x005c000000000000UL; 1193 return 0; 1194 } 1195 1196 static void __exit kvm_s390_exit(void) 1197 { 1198 free_page((unsigned long) vfacilities); 1199 kvm_exit(); 1200 } 1201 1202 module_init(kvm_s390_init); 1203 module_exit(kvm_s390_exit); 1204 1205 /* 1206 * Enable autoloading of the kvm module. 1207 * Note that we add the module alias here instead of virt/kvm/kvm_main.c 1208 * since x86 takes a different approach. 1209 */ 1210 #include <linux/miscdevice.h> 1211 MODULE_ALIAS_MISCDEV(KVM_MINOR); 1212 MODULE_ALIAS("devname:kvm"); 1213
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.