KVM fixes for 4.19-rc3

ARM:
  - Fix a VFP corruption in 32-bit guest
  - Add missing cache invalidation for CoW pages
  - Two small cleanups
 
 s390:
  - Fallout from the hugetlbfs support: pfmf interpretion and locking
  - VSIE: fix keywrapping for nested guests
 
 PPC:
  - Fix a bug where pages might not get marked dirty, causing
    guest memory corruption on migration,
  - Fix a bug causing reads from guest memory to use the wrong guest
    real address for very large HPT guests (>256G of memory), leading to
    failures in instruction emulation.
 
 x86:
  - Fix out of bound access from malicious pv ipi hypercalls (introduced
    in rc1)
  - Fix delivery of pending interrupts when entering a nested guest,
    preventing arbitrarily late injection
  - Sanitize kvm_stat output after destroying a guest
  - Fix infinite loop when emulating a nested guest page fault
    and improve the surrounding emulation code
  - Two minor cleanups
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABCAAGBQJbk5gAAAoJEED/6hsPKofoS0UH/1clCzg/8x3jhpDcKKp6tDm7
 9XHOOQ6XmydT0HXYJNqZepGNqU99ip+2u4x8E9LCT5MTvTMZ1BcNM6PmenjJVULY
 GMJtwZhjqoklrOcNkXGqIye4Ec+I0pBuMmt0AN0N85CcHO8VUBpMzsdxgJLuxcRm
 UT6OZnCLyJsock6BqkZmqVsJj/gemFnI9MpudnrU8cCFk60roXmQWJ66fMIFfKjt
 q0R61t8nmbapQKE8pjqBNgbCsuotVOtU1zgMkeM5LkaYEfc65ZPdgt3sdpyG8Guq
 WA7Vt6HEvmNrcQxHFX5P0GxTVM9lOVCUx1bKXE4+57CMZOYl/8hDaTudlcacutg=
 =FyuN
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM:
   - Fix a VFP corruption in 32-bit guest
   - Add missing cache invalidation for CoW pages
   - Two small cleanups

  s390:
   - Fallout from the hugetlbfs support: pfmf interpretion and locking
   - VSIE: fix keywrapping for nested guests

  PPC:
   - Fix a bug where pages might not get marked dirty, causing guest
     memory corruption on migration
   - Fix a bug causing reads from guest memory to use the wrong guest
     real address for very large HPT guests (>256G of memory), leading
     to failures in instruction emulation.

  x86:
   - Fix out of bound access from malicious pv ipi hypercalls
     (introduced in rc1)
   - Fix delivery of pending interrupts when entering a nested guest,
     preventing arbitrarily late injection
   - Sanitize kvm_stat output after destroying a guest
   - Fix infinite loop when emulating a nested guest page fault and
     improve the surrounding emulation code
   - Two minor cleanups"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  KVM: LAPIC: Fix pv ipis out-of-bounds access
  KVM: nVMX: Fix loss of pending IRQ/NMI before entering L2
  arm64: KVM: Remove pgd_lock
  KVM: Remove obsolete kvm_unmap_hva notifier backend
  arm64: KVM: Only force FPEXC32_EL2.EN if trapping FPSIMD
  KVM: arm/arm64: Clean dcache to PoC when changing PTE due to CoW
  KVM: s390: Properly lock mm context allow_gmap_hpage_1m setting
  KVM: s390: vsie: copy wrapping keys to right place
  KVM: s390: Fix pfmf and conditional skey emulation
  tools/kvm_stat: re-animate display of dead guests
  tools/kvm_stat: indicate dead guests as such
  tools/kvm_stat: handle guest removals more gracefully
  tools/kvm_stat: don't reset stats when setting PID filter for debugfs
  tools/kvm_stat: fix updates for dead guests
  tools/kvm_stat: fix handling of invalid paths in debugfs provider
  tools/kvm_stat: fix python3 issues
  KVM: x86: Unexport x86_emulate_instruction()
  KVM: x86: Rename emulate_instruction() to kvm_emulate_instruction()
  KVM: x86: Do not re-{try,execute} after failed emulation in L2
  KVM: x86: Default to not allowing emulation retry in kvm_mmu_page_fault
  ...
This commit is contained in:
Linus Torvalds 2018-09-08 15:52:45 -07:00
commit f8f65382c9
21 changed files with 204 additions and 134 deletions

View File

@ -223,7 +223,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);

View File

@ -61,8 +61,7 @@ struct kvm_arch {
u64 vmid_gen;
u32 vmid;
/* 1-level 2nd stage table and lock */
spinlock_t pgd_lock;
/* 1-level 2nd stage table, protected by kvm->mmu_lock */
pgd_t *pgd;
/* VTTBR value associated with above pgd and vmid */
@ -357,7 +356,6 @@ int __kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu,
struct kvm_vcpu_events *events);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);

View File

@ -98,8 +98,10 @@ static void activate_traps_vhe(struct kvm_vcpu *vcpu)
val = read_sysreg(cpacr_el1);
val |= CPACR_EL1_TTA;
val &= ~CPACR_EL1_ZEN;
if (!update_fp_enabled(vcpu))
if (!update_fp_enabled(vcpu)) {
val &= ~CPACR_EL1_FPEN;
__activate_traps_fpsimd32(vcpu);
}
write_sysreg(val, cpacr_el1);
@ -114,8 +116,10 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu)
val = CPTR_EL2_DEFAULT;
val |= CPTR_EL2_TTA | CPTR_EL2_TZ;
if (!update_fp_enabled(vcpu))
if (!update_fp_enabled(vcpu)) {
val |= CPTR_EL2_TFP;
__activate_traps_fpsimd32(vcpu);
}
write_sysreg(val, cptr_el2);
}
@ -129,7 +133,6 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN) && (hcr & HCR_VSE))
write_sysreg_s(vcpu->arch.vsesr_el2, SYS_VSESR_EL2);
__activate_traps_fpsimd32(vcpu);
if (has_vhe())
activate_traps_vhe(vcpu);
else

View File

@ -931,7 +931,6 @@ enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu,
bool write);
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);

View File

@ -512,16 +512,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end,
return 1;
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
unsigned long end = hva + PAGE_SIZE;
handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
kvm_mips_callbacks->flush_shadow_all(kvm);
return 0;
}
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL);

View File

@ -358,7 +358,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
unsigned long pp, key;
unsigned long v, orig_v, gr;
__be64 *hptep;
int index;
long int index;
int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
if (kvm_is_radix(vcpu->kvm))

View File

@ -725,10 +725,10 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
gpa, shift);
kvmppc_radix_tlbie_page(kvm, gpa, shift);
if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) {
unsigned long npages = 1;
unsigned long psize = PAGE_SIZE;
if (shift)
npages = 1ul << (shift - PAGE_SHIFT);
kvmppc_update_dirty_map(memslot, gfn, npages);
psize = 1ul << shift;
kvmppc_update_dirty_map(memslot, gfn, psize);
}
}
return 0;

View File

@ -16,7 +16,13 @@ typedef struct {
unsigned long asce;
unsigned long asce_limit;
unsigned long vdso_base;
/* The mmu context allocates 4K page tables. */
/*
* The following bitfields need a down_write on the mm
* semaphore when they are written to. As they are only
* written once, they can be read without a lock.
*
* The mmu context allocates 4K page tables.
*/
unsigned int alloc_pgste:1;
/* The mmu context uses extended page tables. */
unsigned int has_pgste:1;

View File

@ -695,7 +695,9 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
r = -EINVAL;
else {
r = 0;
down_write(&kvm->mm->mmap_sem);
kvm->mm->context.allow_gmap_hpage_1m = 1;
up_write(&kvm->mm->mmap_sem);
/*
* We might have to create fake 4k page
* tables. To avoid that the hardware works on

View File

@ -280,9 +280,11 @@ static int handle_iske(struct kvm_vcpu *vcpu)
goto retry;
}
}
if (rc)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
return rc;
vcpu->run->s.regs.gprs[reg1] &= ~0xff;
vcpu->run->s.regs.gprs[reg1] |= key;
return 0;
@ -324,9 +326,11 @@ static int handle_rrbe(struct kvm_vcpu *vcpu)
goto retry;
}
}
if (rc < 0)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
if (rc < 0)
return rc;
kvm_s390_set_psw_cc(vcpu, rc);
return 0;
}
@ -390,12 +394,12 @@ static int handle_sske(struct kvm_vcpu *vcpu)
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
up_read(&current->mm->mmap_sem);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
if (rc >= 0)
start += PAGE_SIZE;
if (rc < 0)
return rc;
start += PAGE_SIZE;
}
if (m3 & (SSKE_MC | SSKE_MR)) {
@ -1002,13 +1006,15 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
FAULT_FLAG_WRITE, &unlocked);
rc = !rc ? -EAGAIN : rc;
}
up_read(&current->mm->mmap_sem);
if (rc == -EFAULT)
return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
up_read(&current->mm->mmap_sem);
if (rc >= 0)
start += PAGE_SIZE;
if (rc == -EAGAIN)
continue;
if (rc < 0)
return rc;
}
start += PAGE_SIZE;
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
if (psw_bits(vcpu->arch.sie_block->gpsw).eaba == PSW_BITS_AMODE_64BIT) {

View File

@ -173,7 +173,8 @@ static int shadow_crycb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
return set_validity_icpt(scb_s, 0x0039U);
/* copy only the wrapping keys */
if (read_guest_real(vcpu, crycb_addr + 72, &vsie_page->crycb, 56))
if (read_guest_real(vcpu, crycb_addr + 72,
vsie_page->crycb.dea_wrapping_key_mask, 56))
return set_validity_icpt(scb_s, 0x0035U);
scb_s->ecb3 |= ecb3_flags;

View File

@ -1237,19 +1237,12 @@ enum emulation_result {
#define EMULTYPE_NO_DECODE (1 << 0)
#define EMULTYPE_TRAP_UD (1 << 1)
#define EMULTYPE_SKIP (1 << 2)
#define EMULTYPE_RETRY (1 << 3)
#define EMULTYPE_NO_REEXECUTE (1 << 4)
#define EMULTYPE_NO_UD_ON_FAIL (1 << 5)
#define EMULTYPE_VMWARE (1 << 6)
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
static inline int emulate_instruction(struct kvm_vcpu *vcpu,
int emulation_type)
{
return x86_emulate_instruction(vcpu, 0,
emulation_type | EMULTYPE_NO_REEXECUTE, NULL, 0);
}
#define EMULTYPE_ALLOW_RETRY (1 << 3)
#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
#define EMULTYPE_VMWARE (1 << 5)
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len);
void kvm_enable_efer_bits(u64);
bool kvm_valid_efer(struct kvm_vcpu *vcpu, u64 efer);
@ -1450,7 +1443,6 @@ asmlinkage void kvm_spurious_fault(void);
____kvm_handle_fault_on_reboot(insn, "")
#define KVM_ARCH_WANT_MMU_NOTIFIER
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
@ -1463,7 +1455,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, int min,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
u64 kvm_get_arch_capabilities(void);

View File

@ -548,7 +548,7 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
}
int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, int min,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit)
{
int i;
@ -571,18 +571,31 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
if (min > map->max_apic_id)
goto out;
/* Bits above cluster_size are masked in the caller. */
for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, &irq, NULL);
for_each_set_bit(i, &ipi_bitmap_low,
min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
if (map->phys_map[min + i]) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, &irq, NULL);
}
}
min += cluster_size;
for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, &irq, NULL);
if (min > map->max_apic_id)
goto out;
for_each_set_bit(i, &ipi_bitmap_high,
min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
if (map->phys_map[min + i]) {
vcpu = map->phys_map[min + i]->vcpu;
count += kvm_apic_set_irq(vcpu, &irq, NULL);
}
}
out:
rcu_read_unlock();
return count;
}

View File

@ -1853,11 +1853,6 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
}
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
{
return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
@ -5217,7 +5212,7 @@ static int make_mmu_pages_available(struct kvm_vcpu *vcpu)
int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
void *insn, int insn_len)
{
int r, emulation_type = EMULTYPE_RETRY;
int r, emulation_type = 0;
enum emulation_result er;
bool direct = vcpu->arch.mmu.direct_map;
@ -5230,10 +5225,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
r = RET_PF_INVALID;
if (unlikely(error_code & PFERR_RSVD_MASK)) {
r = handle_mmio_page_fault(vcpu, cr2, direct);
if (r == RET_PF_EMULATE) {
emulation_type = 0;
if (r == RET_PF_EMULATE)
goto emulate;
}
}
if (r == RET_PF_INVALID) {
@ -5260,8 +5253,19 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u64 error_code,
return 1;
}
if (mmio_info_in_cache(vcpu, cr2, direct))
emulation_type = 0;
/*
* vcpu->arch.mmu.page_fault returned RET_PF_EMULATE, but we can still
* optimistically try to just unprotect the page and let the processor
* re-execute the instruction that caused the page fault. Do not allow
* retrying MMIO emulation, as it's not only pointless but could also
* cause us to enter an infinite loop because the processor will keep
* faulting on the non-existent MMIO address. Retrying an instruction
* from a nested guest is also pointless and dangerous as we are only
* explicitly shadowing L1's page tables, i.e. unprotecting something
* for L1 isn't going to magically fix whatever issue cause L2 to fail.
*/
if (!mmio_info_in_cache(vcpu, cr2, direct) && !is_guest_mode(vcpu))
emulation_type = EMULTYPE_ALLOW_RETRY;
emulate:
/*
* On AMD platforms, under certain conditions insn_len may be zero on #NPF.

View File

@ -776,7 +776,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
}
if (!svm->next_rip) {
if (emulate_instruction(vcpu, EMULTYPE_SKIP) !=
if (kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) !=
EMULATE_DONE)
printk(KERN_DEBUG "%s: NOP\n", __func__);
return;
@ -2715,7 +2715,7 @@ static int gp_interception(struct vcpu_svm *svm)
WARN_ON_ONCE(!enable_vmware_backdoor);
er = emulate_instruction(vcpu,
er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@ -2819,7 +2819,7 @@ static int io_interception(struct vcpu_svm *svm)
string = (io_info & SVM_IOIO_STR_MASK) != 0;
in = (io_info & SVM_IOIO_TYPE_MASK) != 0;
if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
@ -3861,7 +3861,7 @@ static int iret_interception(struct vcpu_svm *svm)
static int invlpg_interception(struct vcpu_svm *svm)
{
if (!static_cpu_has(X86_FEATURE_DECODEASSISTS))
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
return kvm_skip_emulated_instruction(&svm->vcpu);
@ -3869,13 +3869,13 @@ static int invlpg_interception(struct vcpu_svm *svm)
static int emulate_on_interception(struct vcpu_svm *svm)
{
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
}
static int rsm_interception(struct vcpu_svm *svm)
{
return x86_emulate_instruction(&svm->vcpu, 0, 0,
rsm_ins_bytes, 2) == EMULATE_DONE;
return kvm_emulate_instruction_from_buffer(&svm->vcpu,
rsm_ins_bytes, 2) == EMULATE_DONE;
}
static int rdpmc_interception(struct vcpu_svm *svm)
@ -4700,7 +4700,7 @@ static int avic_unaccelerated_access_interception(struct vcpu_svm *svm)
ret = avic_unaccel_trap_write(svm);
} else {
/* Handling Fault */
ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
ret = (kvm_emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE);
}
return ret;
@ -6747,7 +6747,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
{
unsigned long vaddr, vaddr_end, next_vaddr;
unsigned long dst_vaddr, dst_vaddr_end;
unsigned long dst_vaddr;
struct page **src_p, **dst_p;
struct kvm_sev_dbg debug;
unsigned long n;
@ -6763,7 +6763,6 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
size = debug.len;
vaddr_end = vaddr + size;
dst_vaddr = debug.dst_uaddr;
dst_vaddr_end = dst_vaddr + size;
for (; vaddr < vaddr_end; vaddr = next_vaddr) {
int len, s_off, d_off;

View File

@ -6983,7 +6983,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
* Cause the #SS fault with 0 error code in VM86 mode.
*/
if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0) {
if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
return kvm_vcpu_halt(vcpu);
@ -7054,7 +7054,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
if (!vmx->rmode.vm86_active && is_gp_fault(intr_info)) {
WARN_ON_ONCE(!enable_vmware_backdoor);
er = emulate_instruction(vcpu,
er = kvm_emulate_instruction(vcpu,
EMULTYPE_VMWARE | EMULTYPE_NO_UD_ON_FAIL);
if (er == EMULATE_USER_EXIT)
return 0;
@ -7157,7 +7157,7 @@ static int handle_io(struct kvm_vcpu *vcpu)
++vcpu->stat.io_exits;
if (string)
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
port = exit_qualification >> 16;
size = (exit_qualification & 7) + 1;
@ -7231,7 +7231,7 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val)
static int handle_desc(struct kvm_vcpu *vcpu)
{
WARN_ON(!(vcpu->arch.cr4 & X86_CR4_UMIP));
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_cr(struct kvm_vcpu *vcpu)
@ -7480,7 +7480,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu)
static int handle_invd(struct kvm_vcpu *vcpu)
{
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_invlpg(struct kvm_vcpu *vcpu)
@ -7547,7 +7547,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu)
return kvm_skip_emulated_instruction(vcpu);
}
}
return emulate_instruction(vcpu, 0) == EMULATE_DONE;
return kvm_emulate_instruction(vcpu, 0) == EMULATE_DONE;
}
static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu)
@ -7704,8 +7704,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
return kvm_skip_emulated_instruction(vcpu);
else
return x86_emulate_instruction(vcpu, gpa, EMULTYPE_SKIP,
NULL, 0) == EMULATE_DONE;
return kvm_emulate_instruction(vcpu, EMULTYPE_SKIP) ==
EMULATE_DONE;
}
return kvm_mmu_page_fault(vcpu, gpa, PFERR_RSVD_MASK, NULL, 0);
@ -7748,7 +7748,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
err = emulate_instruction(vcpu, 0);
err = kvm_emulate_instruction(vcpu, 0);
if (err == EMULATE_USER_EXIT) {
++vcpu->stat.mmio_exits;
@ -12537,8 +12537,11 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
bool from_vmentry = !!exit_qual;
u32 dummy_exit_qual;
u32 vmcs01_cpu_exec_ctrl;
int r = 0;
vmcs01_cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
enter_guest_mode(vcpu);
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
@ -12574,6 +12577,25 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, u32 *exit_qual)
kvm_make_request(KVM_REQ_GET_VMCS12_PAGES, vcpu);
}
/*
* If L1 had a pending IRQ/NMI until it executed
* VMLAUNCH/VMRESUME which wasn't delivered because it was
* disallowed (e.g. interrupts disabled), L0 needs to
* evaluate if this pending event should cause an exit from L2
* to L1 or delivered directly to L2 (e.g. In case L1 don't
* intercept EXTERNAL_INTERRUPT).
*
* Usually this would be handled by L0 requesting a
* IRQ/NMI window by setting VMCS accordingly. However,
* this setting was done on VMCS01 and now VMCS02 is active
* instead. Thus, we force L0 to perform pending event
* evaluation by requesting a KVM_REQ_EVENT.
*/
if (vmcs01_cpu_exec_ctrl &
(CPU_BASED_VIRTUAL_INTR_PENDING | CPU_BASED_VIRTUAL_NMI_PENDING)) {
kvm_make_request(KVM_REQ_EVENT, vcpu);
}
/*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@ -13988,9 +14010,6 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
check_vmentry_postreqs(vcpu, vmcs12, &exit_qual))
return -EINVAL;
if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
vmx->nested.nested_run_pending = 1;
vmx->nested.dirty_vmcs12 = true;
ret = enter_vmx_non_root_mode(vcpu, NULL);
if (ret)

View File

@ -4987,7 +4987,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
emul_type = 0;
}
er = emulate_instruction(vcpu, emul_type);
er = kvm_emulate_instruction(vcpu, emul_type);
if (er == EMULATE_USER_EXIT)
return 0;
if (er != EMULATE_DONE)
@ -5870,7 +5870,10 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t cr2,
gpa_t gpa = cr2;
kvm_pfn_t pfn;
if (emulation_type & EMULTYPE_NO_REEXECUTE)
if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
return false;
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (!vcpu->arch.mmu.direct_map) {
@ -5958,7 +5961,10 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
*/
vcpu->arch.last_retry_eip = vcpu->arch.last_retry_addr = 0;
if (!(emulation_type & EMULTYPE_RETRY))
if (!(emulation_type & EMULTYPE_ALLOW_RETRY))
return false;
if (WARN_ON_ONCE(is_guest_mode(vcpu)))
return false;
if (x86_page_table_writing_insn(ctxt))
@ -6276,7 +6282,19 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
return r;
}
EXPORT_SYMBOL_GPL(x86_emulate_instruction);
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type)
{
return x86_emulate_instruction(vcpu, 0, emulation_type, NULL, 0);
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction);
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
void *insn, int insn_len)
{
return x86_emulate_instruction(vcpu, 0, 0, insn, insn_len);
}
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned short port)
@ -7734,7 +7752,7 @@ static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
{
int r;
vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
r = kvm_emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
if (r != EMULATE_DONE)
return 0;

View File

@ -274,6 +274,8 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
int page_num);
bool kvm_vector_hashing_enabled(void);
int x86_emulate_instruction(struct kvm_vcpu *vcpu, unsigned long cr2,
int emulation_type, void *insn, int insn_len);
#define KVM_SUPPORTED_XCR0 (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
| XFEATURE_MASK_YMM | XFEATURE_MASK_BNDREGS \

View File

@ -759,12 +759,18 @@ class DebugfsProvider(Provider):
if len(vms) == 0:
self.do_read = False
self.paths = filter(lambda x: "{}-".format(pid) in x, vms)
self.paths = list(filter(lambda x: "{}-".format(pid) in x, vms))
else:
self.paths = []
self.do_read = True
self.reset()
def _verify_paths(self):
"""Remove invalid paths"""
for path in self.paths:
if not os.path.exists(os.path.join(PATH_DEBUGFS_KVM, path)):
self.paths.remove(path)
continue
def read(self, reset=0, by_guest=0):
"""Returns a dict with format:'file name / field -> current value'.
@ -780,6 +786,7 @@ class DebugfsProvider(Provider):
# If no debugfs filtering support is available, then don't read.
if not self.do_read:
return results
self._verify_paths()
paths = self.paths
if self._pid == 0:
@ -1096,15 +1103,16 @@ class Tui(object):
pid = self.stats.pid_filter
self.screen.erase()
gname = self.get_gname_from_pid(pid)
self._gname = gname
if gname:
gname = ('({})'.format(gname[:MAX_GUEST_NAME_LEN] + '...'
if len(gname) > MAX_GUEST_NAME_LEN
else gname))
if pid > 0:
self.screen.addstr(0, 0, 'kvm statistics - pid {0} {1}'
.format(pid, gname), curses.A_BOLD)
self._headline = 'kvm statistics - pid {0} {1}'.format(pid, gname)
else:
self.screen.addstr(0, 0, 'kvm statistics - summary', curses.A_BOLD)
self._headline = 'kvm statistics - summary'
self.screen.addstr(0, 0, self._headline, curses.A_BOLD)
if self.stats.fields_filter:
regex = self.stats.fields_filter
if len(regex) > MAX_REGEX_LEN:
@ -1162,6 +1170,19 @@ class Tui(object):
return sorted_items
if not self._is_running_guest(self.stats.pid_filter):
if self._gname:
try: # ...to identify the guest by name in case it's back
pids = self.get_pid_from_gname(self._gname)
if len(pids) == 1:
self._refresh_header(pids[0])
self._update_pid(pids[0])
return
except:
pass
self._display_guest_dead()
# leave final data on screen
return
row = 3
self.screen.move(row, 0)
self.screen.clrtobot()
@ -1184,6 +1205,7 @@ class Tui(object):
# print events
tavg = 0
tcur = 0
guest_removed = False
for key, values in get_sorted_events(self, stats):
if row >= self.screen.getmaxyx()[0] - 1 or values == (0, 0):
break
@ -1191,7 +1213,10 @@ class Tui(object):
key = self.get_gname_from_pid(key)
if not key:
continue
cur = int(round(values.delta / sleeptime)) if values.delta else ''
cur = int(round(values.delta / sleeptime)) if values.delta else 0
if cur < 0:
guest_removed = True
continue
if key[0] != ' ':
if values.delta:
tcur += values.delta
@ -1204,13 +1229,21 @@ class Tui(object):
values.value * 100 / float(ltotal), cur))
row += 1
if row == 3:
self.screen.addstr(4, 1, 'No matching events reported yet')
if guest_removed:
self.screen.addstr(4, 1, 'Guest removed, updating...')
else:
self.screen.addstr(4, 1, 'No matching events reported yet')
if row > 4:
tavg = int(round(tcur / sleeptime)) if tcur > 0 else ''
self.screen.addstr(row, 1, '%-40s %10d %8s' %
('Total', total, tavg), curses.A_BOLD)
self.screen.refresh()
def _display_guest_dead(self):
marker = ' Guest is DEAD '
y = min(len(self._headline), 80 - len(marker))
self.screen.addstr(0, y, marker, curses.A_BLINK | curses.A_STANDOUT)
def _show_msg(self, text):
"""Display message centered text and exit on key press"""
hint = 'Press any key to continue'
@ -1219,10 +1252,10 @@ class Tui(object):
(x, term_width) = self.screen.getmaxyx()
row = 2
for line in text:
start = (term_width - len(line)) / 2
start = (term_width - len(line)) // 2
self.screen.addstr(row, start, line)
row += 1
self.screen.addstr(row + 1, (term_width - len(hint)) / 2, hint,
self.screen.addstr(row + 1, (term_width - len(hint)) // 2, hint,
curses.A_STANDOUT)
self.screen.getkey()
@ -1319,6 +1352,12 @@ class Tui(object):
msg = '"' + str(val) + '": Invalid value'
self._refresh_header()
def _is_running_guest(self, pid):
"""Check if pid is still a running process."""
if not pid:
return True
return os.path.isdir(os.path.join('/proc/', str(pid)))
def _show_vm_selection_by_guest(self):
"""Draws guest selection mask.
@ -1346,7 +1385,7 @@ class Tui(object):
if not guest or guest == '0':
break
if guest.isdigit():
if not os.path.isdir(os.path.join('/proc/', guest)):
if not self._is_running_guest(guest):
msg = '"' + guest + '": Not a running process'
continue
pid = int(guest)

View File

@ -1817,18 +1817,6 @@ static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *dat
return 0;
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
{
unsigned long end = hva + PAGE_SIZE;
if (!kvm->arch.pgd)
return 0;
trace_kvm_unmap_hva(hva);
handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL);
return 0;
}
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end)
{
@ -1860,13 +1848,20 @@ static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
{
unsigned long end = hva + PAGE_SIZE;
kvm_pfn_t pfn = pte_pfn(pte);
pte_t stage2_pte;
if (!kvm->arch.pgd)
return;
trace_kvm_set_spte_hva(hva);
stage2_pte = pfn_pte(pte_pfn(pte), PAGE_S2);
/*
* We've moved a page around, probably through CoW, so let's treat it
* just like a translation fault and clean the cache to the PoC.
*/
clean_dcache_guest_page(pfn, PAGE_SIZE);
stage2_pte = pfn_pte(pfn, PAGE_S2);
handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
}

View File

@ -134,21 +134,6 @@ TRACE_EVENT(kvm_mmio_emulate,
__entry->vcpu_pc, __entry->instr, __entry->cpsr)
);
TRACE_EVENT(kvm_unmap_hva,
TP_PROTO(unsigned long hva),
TP_ARGS(hva),
TP_STRUCT__entry(
__field( unsigned long, hva )
),
TP_fast_assign(
__entry->hva = hva;
),
TP_printk("mmu notifier unmap hva: %#08lx", __entry->hva)
);
TRACE_EVENT(kvm_unmap_hva_range,
TP_PROTO(unsigned long start, unsigned long end),
TP_ARGS(start, end),