1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. 4 * Copyright © 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. 5 * 6 * KVM Xen emulation 7 */ 8 9 #include "x86.h" 10 #include "xen.h" 11 #include "hyperv.h" 12 13 #include <linux/kvm_host.h> 14 #include <linux/sched/stat.h> 15 16 #include <trace/events/kvm.h> 17 #include <xen/interface/xen.h> 18 #include <xen/interface/vcpu.h> 19 20 #include "trace.h" 21 22 DEFINE_STATIC_KEY_DEFERRED_FALSE(kvm_xen_enabled, HZ); 23 24 static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) 25 { 26 gpa_t gpa = gfn_to_gpa(gfn); 27 int wc_ofs, sec_hi_ofs; 28 int ret; 29 int idx = srcu_read_lock(&kvm->srcu); 30 31 ret = kvm_gfn_to_hva_cache_init(kvm, &kvm->arch.xen.shinfo_cache, 32 gpa, PAGE_SIZE); 33 if (ret) 34 goto out; 35 36 kvm->arch.xen.shinfo_set = true; 37 38 /* Paranoia checks on the 32-bit struct layout */ 39 BUILD_BUG_ON(offsetof(struct compat_shared_info, wc) != 0x900); 40 BUILD_BUG_ON(offsetof(struct compat_shared_info, arch.wc_sec_hi) != 0x924); 41 BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); 42 43 /* 32-bit location by default */ 44 wc_ofs = offsetof(struct compat_shared_info, wc); 45 sec_hi_ofs = offsetof(struct compat_shared_info, arch.wc_sec_hi); 46 47 #ifdef CONFIG_X86_64 48 /* Paranoia checks on the 64-bit struct layout */ 49 BUILD_BUG_ON(offsetof(struct shared_info, wc) != 0xc00); 50 BUILD_BUG_ON(offsetof(struct shared_info, wc_sec_hi) != 0xc0c); 51 52 if (kvm->arch.xen.long_mode) { 53 wc_ofs = offsetof(struct shared_info, wc); 54 sec_hi_ofs = offsetof(struct shared_info, wc_sec_hi); 55 } 56 #endif 57 58 kvm_write_wall_clock(kvm, gpa + wc_ofs, sec_hi_ofs - wc_ofs); 59 kvm_make_all_cpus_request(kvm, KVM_REQ_MASTERCLOCK_UPDATE); 60 61 out: 62 srcu_read_unlock(&kvm->srcu, idx); 63 return ret; 64 } 65 66 static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) 67 { 68 struct kvm_vcpu_xen *vx = &v->arch.xen; 69 u64 now = get_kvmclock_ns(v->kvm); 70 u64 delta_ns = now - vx->runstate_entry_time; 71 u64 run_delay = current->sched_info.run_delay; 72 73 if (unlikely(!vx->runstate_entry_time)) 74 vx->current_runstate = RUNSTATE_offline; 75 76 /* 77 * Time waiting for the scheduler isn't "stolen" if the 78 * vCPU wasn't running anyway. 79 */ 80 if (vx->current_runstate == RUNSTATE_running) { 81 u64 steal_ns = run_delay - vx->last_steal; 82 83 delta_ns -= steal_ns; 84 85 vx->runstate_times[RUNSTATE_runnable] += steal_ns; 86 } 87 vx->last_steal = run_delay; 88 89 vx->runstate_times[vx->current_runstate] += delta_ns; 90 vx->current_runstate = state; 91 vx->runstate_entry_time = now; 92 } 93 94 void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) 95 { 96 struct kvm_vcpu_xen *vx = &v->arch.xen; 97 uint64_t state_entry_time; 98 unsigned int offset; 99 100 kvm_xen_update_runstate(v, state); 101 102 if (!vx->runstate_set) 103 return; 104 105 BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); 106 107 offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); 108 #ifdef CONFIG_X86_64 109 /* 110 * The only difference is alignment of uint64_t in 32-bit. 111 * So the first field 'state' is accessed directly using 112 * offsetof() (where its offset happens to be zero), while the 113 * remaining fields which are all uint64_t, start at 'offset' 114 * which we tweak here by adding 4. 115 */ 116 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 117 offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); 118 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != 119 offsetof(struct compat_vcpu_runstate_info, time) + 4); 120 121 if (v->kvm->arch.xen.long_mode) 122 offset = offsetof(struct vcpu_runstate_info, state_entry_time); 123 #endif 124 /* 125 * First write the updated state_entry_time at the appropriate 126 * location determined by 'offset'. 127 */ 128 state_entry_time = vx->runstate_entry_time; 129 state_entry_time |= XEN_RUNSTATE_UPDATE; 130 131 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != 132 sizeof(state_entry_time)); 133 BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != 134 sizeof(state_entry_time)); 135 136 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 137 &state_entry_time, offset, 138 sizeof(state_entry_time))) 139 return; 140 smp_wmb(); 141 142 /* 143 * Next, write the new runstate. This is in the *same* place 144 * for 32-bit and 64-bit guests, asserted here for paranoia. 145 */ 146 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 147 offsetof(struct compat_vcpu_runstate_info, state)); 148 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != 149 sizeof(vx->current_runstate)); 150 BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != 151 sizeof(vx->current_runstate)); 152 153 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 154 &vx->current_runstate, 155 offsetof(struct vcpu_runstate_info, state), 156 sizeof(vx->current_runstate))) 157 return; 158 159 /* 160 * Write the actual runstate times immediately after the 161 * runstate_entry_time. 162 */ 163 BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != 164 offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); 165 BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != 166 offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); 167 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != 168 sizeof(((struct compat_vcpu_runstate_info *)0)->time)); 169 BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != 170 sizeof(vx->runstate_times)); 171 172 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 173 &vx->runstate_times[0], 174 offset + sizeof(u64), 175 sizeof(vx->runstate_times))) 176 return; 177 178 smp_wmb(); 179 180 /* 181 * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's 182 * runstate_entry_time field. 183 */ 184 185 state_entry_time &= ~XEN_RUNSTATE_UPDATE; 186 if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, 187 &state_entry_time, offset, 188 sizeof(state_entry_time))) 189 return; 190 } 191 192 int __kvm_xen_has_interrupt(struct kvm_vcpu *v) 193 { 194 int err; 195 u8 rc = 0; 196 197 /* 198 * If the global upcall vector (HVMIRQ_callback_vector) is set and 199 * the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending. 200 */ 201 struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache; 202 struct kvm_memslots *slots = kvm_memslots(v->kvm); 203 unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending); 204 205 /* No need for compat handling here */ 206 BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) != 207 offsetof(struct compat_vcpu_info, evtchn_upcall_pending)); 208 BUILD_BUG_ON(sizeof(rc) != 209 sizeof(((struct vcpu_info *)0)->evtchn_upcall_pending)); 210 BUILD_BUG_ON(sizeof(rc) != 211 sizeof(((struct compat_vcpu_info *)0)->evtchn_upcall_pending)); 212 213 /* 214 * For efficiency, this mirrors the checks for using the valid 215 * cache in kvm_read_guest_offset_cached(), but just uses 216 * __get_user() instead. And falls back to the slow path. 217 */ 218 if (likely(slots->generation == ghc->generation && 219 !kvm_is_error_hva(ghc->hva) && ghc->memslot)) { 220 /* Fast path */ 221 pagefault_disable(); 222 err = __get_user(rc, (u8 __user *)ghc->hva + offset); 223 pagefault_enable(); 224 if (!err) 225 return rc; 226 } 227 228 /* Slow path */ 229 230 /* 231 * This function gets called from kvm_vcpu_block() after setting the 232 * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately 233 * from a HLT. So we really mustn't sleep. If the page ended up absent 234 * at that point, just return 1 in order to trigger an immediate wake, 235 * and we'll end up getting called again from a context where we *can* 236 * fault in the page and wait for it. 237 */ 238 if (in_atomic() || !task_is_running(current)) 239 return 1; 240 241 kvm_read_guest_offset_cached(v->kvm, ghc, &rc, offset, 242 sizeof(rc)); 243 244 return rc; 245 } 246 247 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 248 { 249 int r = -ENOENT; 250 251 mutex_lock(&kvm->lock); 252 253 switch (data->type) { 254 case KVM_XEN_ATTR_TYPE_LONG_MODE: 255 if (!IS_ENABLED(CONFIG_64BIT) && data->u.long_mode) { 256 r = -EINVAL; 257 } else { 258 kvm->arch.xen.long_mode = !!data->u.long_mode; 259 r = 0; 260 } 261 break; 262 263 case KVM_XEN_ATTR_TYPE_SHARED_INFO: 264 if (data->u.shared_info.gfn == GPA_INVALID) { 265 kvm->arch.xen.shinfo_set = false; 266 r = 0; 267 break; 268 } 269 r = kvm_xen_shared_info_init(kvm, data->u.shared_info.gfn); 270 break; 271 272 273 case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: 274 if (data->u.vector && data->u.vector < 0x10) 275 r = -EINVAL; 276 else { 277 kvm->arch.xen.upcall_vector = data->u.vector; 278 r = 0; 279 } 280 break; 281 282 default: 283 break; 284 } 285 286 mutex_unlock(&kvm->lock); 287 return r; 288 } 289 290 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) 291 { 292 int r = -ENOENT; 293 294 mutex_lock(&kvm->lock); 295 296 switch (data->type) { 297 case KVM_XEN_ATTR_TYPE_LONG_MODE: 298 data->u.long_mode = kvm->arch.xen.long_mode; 299 r = 0; 300 break; 301 302 case KVM_XEN_ATTR_TYPE_SHARED_INFO: 303 if (kvm->arch.xen.shinfo_set) 304 data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa); 305 else 306 data->u.shared_info.gfn = GPA_INVALID; 307 r = 0; 308 break; 309 310 case KVM_XEN_ATTR_TYPE_UPCALL_VECTOR: 311 data->u.vector = kvm->arch.xen.upcall_vector; 312 r = 0; 313 break; 314 315 default: 316 break; 317 } 318 319 mutex_unlock(&kvm->lock); 320 return r; 321 } 322 323 int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) 324 { 325 int idx, r = -ENOENT; 326 327 mutex_lock(&vcpu->kvm->lock); 328 idx = srcu_read_lock(&vcpu->kvm->srcu); 329 330 switch (data->type) { 331 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: 332 /* No compat necessary here. */ 333 BUILD_BUG_ON(sizeof(struct vcpu_info) != 334 sizeof(struct compat_vcpu_info)); 335 BUILD_BUG_ON(offsetof(struct vcpu_info, time) != 336 offsetof(struct compat_vcpu_info, time)); 337 338 if (data->u.gpa == GPA_INVALID) { 339 vcpu->arch.xen.vcpu_info_set = false; 340 r = 0; 341 break; 342 } 343 344 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 345 &vcpu->arch.xen.vcpu_info_cache, 346 data->u.gpa, 347 sizeof(struct vcpu_info)); 348 if (!r) { 349 vcpu->arch.xen.vcpu_info_set = true; 350 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 351 } 352 break; 353 354 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 355 if (data->u.gpa == GPA_INVALID) { 356 vcpu->arch.xen.vcpu_time_info_set = false; 357 r = 0; 358 break; 359 } 360 361 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 362 &vcpu->arch.xen.vcpu_time_info_cache, 363 data->u.gpa, 364 sizeof(struct pvclock_vcpu_time_info)); 365 if (!r) { 366 vcpu->arch.xen.vcpu_time_info_set = true; 367 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); 368 } 369 break; 370 371 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: 372 if (!sched_info_on()) { 373 r = -EOPNOTSUPP; 374 break; 375 } 376 if (data->u.gpa == GPA_INVALID) { 377 vcpu->arch.xen.runstate_set = false; 378 r = 0; 379 break; 380 } 381 382 r = kvm_gfn_to_hva_cache_init(vcpu->kvm, 383 &vcpu->arch.xen.runstate_cache, 384 data->u.gpa, 385 sizeof(struct vcpu_runstate_info)); 386 if (!r) { 387 vcpu->arch.xen.runstate_set = true; 388 } 389 break; 390 391 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: 392 if (!sched_info_on()) { 393 r = -EOPNOTSUPP; 394 break; 395 } 396 if (data->u.runstate.state > RUNSTATE_offline) { 397 r = -EINVAL; 398 break; 399 } 400 401 kvm_xen_update_runstate(vcpu, data->u.runstate.state); 402 r = 0; 403 break; 404 405 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: 406 if (!sched_info_on()) { 407 r = -EOPNOTSUPP; 408 break; 409 } 410 if (data->u.runstate.state > RUNSTATE_offline) { 411 r = -EINVAL; 412 break; 413 } 414 if (data->u.runstate.state_entry_time != 415 (data->u.runstate.time_running + 416 data->u.runstate.time_runnable + 417 data->u.runstate.time_blocked + 418 data->u.runstate.time_offline)) { 419 r = -EINVAL; 420 break; 421 } 422 if (get_kvmclock_ns(vcpu->kvm) < 423 data->u.runstate.state_entry_time) { 424 r = -EINVAL; 425 break; 426 } 427 428 vcpu->arch.xen.current_runstate = data->u.runstate.state; 429 vcpu->arch.xen.runstate_entry_time = 430 data->u.runstate.state_entry_time; 431 vcpu->arch.xen.runstate_times[RUNSTATE_running] = 432 data->u.runstate.time_running; 433 vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = 434 data->u.runstate.time_runnable; 435 vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = 436 data->u.runstate.time_blocked; 437 vcpu->arch.xen.runstate_times[RUNSTATE_offline] = 438 data->u.runstate.time_offline; 439 vcpu->arch.xen.last_steal = current->sched_info.run_delay; 440 r = 0; 441 break; 442 443 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: 444 if (!sched_info_on()) { 445 r = -EOPNOTSUPP; 446 break; 447 } 448 if (data->u.runstate.state > RUNSTATE_offline && 449 data->u.runstate.state != (u64)-1) { 450 r = -EINVAL; 451 break; 452 } 453 /* The adjustment must add up */ 454 if (data->u.runstate.state_entry_time != 455 (data->u.runstate.time_running + 456 data->u.runstate.time_runnable + 457 data->u.runstate.time_blocked + 458 data->u.runstate.time_offline)) { 459 r = -EINVAL; 460 break; 461 } 462 463 if (get_kvmclock_ns(vcpu->kvm) < 464 (vcpu->arch.xen.runstate_entry_time + 465 data->u.runstate.state_entry_time)) { 466 r = -EINVAL; 467 break; 468 } 469 470 vcpu->arch.xen.runstate_entry_time += 471 data->u.runstate.state_entry_time; 472 vcpu->arch.xen.runstate_times[RUNSTATE_running] += 473 data->u.runstate.time_running; 474 vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += 475 data->u.runstate.time_runnable; 476 vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += 477 data->u.runstate.time_blocked; 478 vcpu->arch.xen.runstate_times[RUNSTATE_offline] += 479 data->u.runstate.time_offline; 480 481 if (data->u.runstate.state <= RUNSTATE_offline) 482 kvm_xen_update_runstate(vcpu, data->u.runstate.state); 483 r = 0; 484 break; 485 486 default: 487 break; 488 } 489 490 srcu_read_unlock(&vcpu->kvm->srcu, idx); 491 mutex_unlock(&vcpu->kvm->lock); 492 return r; 493 } 494 495 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) 496 { 497 int r = -ENOENT; 498 499 mutex_lock(&vcpu->kvm->lock); 500 501 switch (data->type) { 502 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO: 503 if (vcpu->arch.xen.vcpu_info_set) 504 data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa; 505 else 506 data->u.gpa = GPA_INVALID; 507 r = 0; 508 break; 509 510 case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: 511 if (vcpu->arch.xen.vcpu_time_info_set) 512 data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa; 513 else 514 data->u.gpa = GPA_INVALID; 515 r = 0; 516 break; 517 518 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: 519 if (!sched_info_on()) { 520 r = -EOPNOTSUPP; 521 break; 522 } 523 if (vcpu->arch.xen.runstate_set) { 524 data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; 525 r = 0; 526 } 527 break; 528 529 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: 530 if (!sched_info_on()) { 531 r = -EOPNOTSUPP; 532 break; 533 } 534 data->u.runstate.state = vcpu->arch.xen.current_runstate; 535 r = 0; 536 break; 537 538 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: 539 if (!sched_info_on()) { 540 r = -EOPNOTSUPP; 541 break; 542 } 543 data->u.runstate.state = vcpu->arch.xen.current_runstate; 544 data->u.runstate.state_entry_time = 545 vcpu->arch.xen.runstate_entry_time; 546 data->u.runstate.time_running = 547 vcpu->arch.xen.runstate_times[RUNSTATE_running]; 548 data->u.runstate.time_runnable = 549 vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; 550 data->u.runstate.time_blocked = 551 vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; 552 data->u.runstate.time_offline = 553 vcpu->arch.xen.runstate_times[RUNSTATE_offline]; 554 r = 0; 555 break; 556 557 case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: 558 r = -EINVAL; 559 break; 560 561 default: 562 break; 563 } 564 565 mutex_unlock(&vcpu->kvm->lock); 566 return r; 567 } 568 569 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data) 570 { 571 struct kvm *kvm = vcpu->kvm; 572 u32 page_num = data & ~PAGE_MASK; 573 u64 page_addr = data & PAGE_MASK; 574 bool lm = is_long_mode(vcpu); 575 576 /* Latch long_mode for shared_info pages etc. */ 577 vcpu->kvm->arch.xen.long_mode = lm; 578 579 /* 580 * If Xen hypercall intercept is enabled, fill the hypercall 581 * page with VMCALL/VMMCALL instructions since that's what 582 * we catch. Else the VMM has provided the hypercall pages 583 * with instructions of its own choosing, so use those. 584 */ 585 if (kvm_xen_hypercall_enabled(kvm)) { 586 u8 instructions[32]; 587 int i; 588 589 if (page_num) 590 return 1; 591 592 /* mov imm32, %eax */ 593 instructions[0] = 0xb8; 594 595 /* vmcall / vmmcall */ 596 kvm_x86_ops.patch_hypercall(vcpu, instructions + 5); 597 598 /* ret */ 599 instructions[8] = 0xc3; 600 601 /* int3 to pad */ 602 memset(instructions + 9, 0xcc, sizeof(instructions) - 9); 603 604 for (i = 0; i < PAGE_SIZE / sizeof(instructions); i++) { 605 *(u32 *)&instructions[1] = i; 606 if (kvm_vcpu_write_guest(vcpu, 607 page_addr + (i * sizeof(instructions)), 608 instructions, sizeof(instructions))) 609 return 1; 610 } 611 } else { 612 /* 613 * Note, truncation is a non-issue as 'lm' is guaranteed to be 614 * false for a 32-bit kernel, i.e. when hva_t is only 4 bytes. 615 */ 616 hva_t blob_addr = lm ? kvm->arch.xen_hvm_config.blob_addr_64 617 : kvm->arch.xen_hvm_config.blob_addr_32; 618 u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64 619 : kvm->arch.xen_hvm_config.blob_size_32; 620 u8 *page; 621 622 if (page_num >= blob_size) 623 return 1; 624 625 blob_addr += page_num * PAGE_SIZE; 626 627 page = memdup_user((u8 __user *)blob_addr, PAGE_SIZE); 628 if (IS_ERR(page)) 629 return PTR_ERR(page); 630 631 if (kvm_vcpu_write_guest(vcpu, page_addr, page, PAGE_SIZE)) { 632 kfree(page); 633 return 1; 634 } 635 } 636 return 0; 637 } 638 639 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc) 640 { 641 if (xhc->flags & ~KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) 642 return -EINVAL; 643 644 /* 645 * With hypercall interception the kernel generates its own 646 * hypercall page so it must not be provided. 647 */ 648 if ((xhc->flags & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL) && 649 (xhc->blob_addr_32 || xhc->blob_addr_64 || 650 xhc->blob_size_32 || xhc->blob_size_64)) 651 return -EINVAL; 652 653 mutex_lock(&kvm->lock); 654 655 if (xhc->msr && !kvm->arch.xen_hvm_config.msr) 656 static_branch_inc(&kvm_xen_enabled.key); 657 else if (!xhc->msr && kvm->arch.xen_hvm_config.msr) 658 static_branch_slow_dec_deferred(&kvm_xen_enabled); 659 660 memcpy(&kvm->arch.xen_hvm_config, xhc, sizeof(*xhc)); 661 662 mutex_unlock(&kvm->lock); 663 return 0; 664 } 665 666 void kvm_xen_destroy_vm(struct kvm *kvm) 667 { 668 if (kvm->arch.xen_hvm_config.msr) 669 static_branch_slow_dec_deferred(&kvm_xen_enabled); 670 } 671 672 static int kvm_xen_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) 673 { 674 kvm_rax_write(vcpu, result); 675 return kvm_skip_emulated_instruction(vcpu); 676 } 677 678 static int kvm_xen_hypercall_complete_userspace(struct kvm_vcpu *vcpu) 679 { 680 struct kvm_run *run = vcpu->run; 681 682 if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.xen.hypercall_rip))) 683 return 1; 684 685 return kvm_xen_hypercall_set_result(vcpu, run->xen.u.hcall.result); 686 } 687 688 int kvm_xen_hypercall(struct kvm_vcpu *vcpu) 689 { 690 bool longmode; 691 u64 input, params[6]; 692 693 input = (u64)kvm_register_read(vcpu, VCPU_REGS_RAX); 694 695 /* Hyper-V hypercalls get bit 31 set in EAX */ 696 if ((input & 0x80000000) && 697 kvm_hv_hypercall_enabled(vcpu)) 698 return kvm_hv_hypercall(vcpu); 699 700 longmode = is_64_bit_mode(vcpu); 701 if (!longmode) { 702 params[0] = (u32)kvm_rbx_read(vcpu); 703 params[1] = (u32)kvm_rcx_read(vcpu); 704 params[2] = (u32)kvm_rdx_read(vcpu); 705 params[3] = (u32)kvm_rsi_read(vcpu); 706 params[4] = (u32)kvm_rdi_read(vcpu); 707 params[5] = (u32)kvm_rbp_read(vcpu); 708 } 709 #ifdef CONFIG_X86_64 710 else { 711 params[0] = (u64)kvm_rdi_read(vcpu); 712 params[1] = (u64)kvm_rsi_read(vcpu); 713 params[2] = (u64)kvm_rdx_read(vcpu); 714 params[3] = (u64)kvm_r10_read(vcpu); 715 params[4] = (u64)kvm_r8_read(vcpu); 716 params[5] = (u64)kvm_r9_read(vcpu); 717 } 718 #endif 719 trace_kvm_xen_hypercall(input, params[0], params[1], params[2], 720 params[3], params[4], params[5]); 721 722 vcpu->run->exit_reason = KVM_EXIT_XEN; 723 vcpu->run->xen.type = KVM_EXIT_XEN_HCALL; 724 vcpu->run->xen.u.hcall.longmode = longmode; 725 vcpu->run->xen.u.hcall.cpl = kvm_x86_ops.get_cpl(vcpu); 726 vcpu->run->xen.u.hcall.input = input; 727 vcpu->run->xen.u.hcall.params[0] = params[0]; 728 vcpu->run->xen.u.hcall.params[1] = params[1]; 729 vcpu->run->xen.u.hcall.params[2] = params[2]; 730 vcpu->run->xen.u.hcall.params[3] = params[3]; 731 vcpu->run->xen.u.hcall.params[4] = params[4]; 732 vcpu->run->xen.u.hcall.params[5] = params[5]; 733 vcpu->arch.xen.hypercall_rip = kvm_get_linear_rip(vcpu); 734 vcpu->arch.complete_userspace_io = 735 kvm_xen_hypercall_complete_userspace; 736 737 return 0; 738 } 739
Linux® is a registered trademark of Linus Torvalds in the United States and other countries.
TOMOYO® is a registered trademark of NTT DATA CORPORATION.