From 0a4c76357ef98eb75c8c357b1ca1ae8527144d4e Mon Sep 17 00:00:00 2001 From: Yuanyuan Zhao Date: Mon, 11 Jul 2022 10:07:50 +0800 Subject: [PATCH] hv: hide mwait from guest. When CPU support MONITOR/MWAIT, OS prefer to use it enter deeper C-state. Now ACRN pass through MONITOR/MWAIT to guest. For vCPUs (ie vCPU A and vCPU B) share a pCPU, if vCPU A uses MWait to enter C state, vCPU B could run only after the time slice of vCPU A is expired. This time slice of vCPU A is gone to waste. For Local APIC pass-through (used for RTVM), the guest pay more attention to timeliness than power saving. So this patch hides MONITOR/MWAIT by: 1. Clear vCPUID.05H, vCPUID.01H:ECX.[bit 3] and MSR_IA32_MISC_ENABLE_MONITOR_ENA to tell the guest VM's vCPU does not support MONITOR/MAIT. 2. Enable MSR_IA32_MISC_ENABLE_MONITOR_ENA bit for MSR_IA32_MISC_ENABLE inject 'GP'. 3. Trap instruction 'MONITOR' and 'MWAIT' and inject 'UD'. 4. Clear vCPUID.07H:ECX.[bit 5] to hide 'UMONITOR/UMWAIT'. 5. Clear "enable user wait and pause" VM-execution control, so UMONITOR/MWAIT causes an 'UD'. Tracked-On: #8253 Signed-off-by: Yuanyuan Zhao Reviewed-by: Fei Li --- hypervisor/arch/x86/guest/vcpuid.c | 14 ++++++------ hypervisor/arch/x86/guest/vm.c | 1 - hypervisor/arch/x86/guest/vmcs.c | 11 +++++++++- hypervisor/arch/x86/guest/vmexit.c | 15 +++++++++++-- hypervisor/arch/x86/guest/vmsr.c | 25 +++++----------------- hypervisor/include/arch/x86/asm/cpuid.h | 4 ++++ hypervisor/include/arch/x86/asm/guest/vm.h | 2 -- 7 files changed, 38 insertions(+), 34 deletions(-) diff --git a/hypervisor/arch/x86/guest/vcpuid.c b/hypervisor/arch/x86/guest/vcpuid.c index f5c3c5867..11acd8647 100644 --- a/hypervisor/arch/x86/guest/vcpuid.c +++ b/hypervisor/arch/x86/guest/vcpuid.c @@ -545,6 +545,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm) } } break; + /* MONITOR/MWAIT */ + case 0x05U: + break; case 0x07U: init_vcpuid_entry(i, 0U, CPUID_CHECK_SUBLEAF, &entry); if (entry.eax != 0U) { @@ -554,6 +557,7 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm) if (is_vsgx_supported(vm->vm_id)) { entry.ebx |= CPUID_EBX_SGX; } + entry.ecx &= ~CPUID_ECX_WAITPKG; #ifdef CONFIG_VCAT_ENABLED if (is_vcat_configured(vm)) { @@ -618,7 +622,6 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm) static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { uint32_t apicid = vlapic_get_apicid(vcpu_vlapic(vcpu)); - uint64_t guest_ia32_misc_enable = vcpu_get_guest_msr(vcpu, MSR_IA32_MISC_ENABLE); uint64_t cr4_reserved_mask = get_cr4_reserved_bits(); cpuid_subleaf(0x1U, 0x0U, eax, ebx, ecx, edx); @@ -651,15 +654,10 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx *ecx &= ~CPUID_ECX_PCID; } - /* guest monitor/mwait is supported only if it is allowed('vm_mwait_cap' is true) - * and MSR_IA32_MISC_ENABLE_MONITOR_ENA bit of guest MSR_IA32_MISC_ENABLE is set, - * else clear cpuid.01h[3]. + /* + * Hide MONITOR/MWAIT. */ *ecx &= ~CPUID_ECX_MONITOR; - if (vcpu->vm->arch_vm.vm_mwait_cap && - ((guest_ia32_misc_enable & MSR_IA32_MISC_ENABLE_MONITOR_ENA) != 0UL)) { - *ecx |= CPUID_ECX_MONITOR; - } *ecx &= ~CPUID_ECX_OSXSAVE; if ((*ecx & CPUID_ECX_XSAVE) != 0U) { diff --git a/hypervisor/arch/x86/guest/vm.c b/hypervisor/arch/x86/guest/vm.c index 3d8e0c3a9..f197275bc 100644 --- a/hypervisor/arch/x86/guest/vm.c +++ b/hypervisor/arch/x86/guest/vm.c @@ -738,7 +738,6 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v spinlock_init(&vm->arch_vm.iwkey_backup_lock); vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC; - vm->arch_vm.vm_mwait_cap = has_monitor_cap(); vm->intr_inject_delay_delta = 0UL; vm->nr_emul_mmio_regions = 0U; vm->vcpuid_entry_nr = 0U; diff --git a/hypervisor/arch/x86/guest/vmcs.c b/hypervisor/arch/x86/guest/vmcs.c index bc6268129..689e4a846 100644 --- a/hypervisor/arch/x86/guest/vmcs.c +++ b/hypervisor/arch/x86/guest/vmcs.c @@ -53,7 +53,8 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3, load_segment(ectx->ldtr, VMX_GUEST_LDTR); /* init guest ia32_misc_enable value for guest read */ - vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE)); + vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, + (msr_read(MSR_IA32_MISC_ENABLE) & (~MSR_IA32_MISC_ENABLE_MONITOR_ENA))); vcpu_set_guest_msr(vcpu, MSR_IA32_PERF_CTL, msr_read(MSR_IA32_PERF_CTL)); @@ -313,6 +314,11 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu) value32 |= VMX_PROCBASED_CTLS_RDPMC; } + /* + * Enable MONITOR/MWAIT cause a VM-EXIT. + */ + value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR; + vcpu->arch.proc_vm_exec_ctrls = value32; exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32); pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32); @@ -335,6 +341,9 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu) value32 &= ~VMX_PROCBASED_CTLS2_INVPCID; } + /* Enable TPAUSE, UMONITOR/UWAIT cause a #UD. */ + value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE; + if (is_apicv_advanced_feature_supported()) { value32 |= VMX_PROCBASED_CTLS2_VIRQ; value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS; diff --git a/hypervisor/arch/x86/guest/vmexit.c b/hypervisor/arch/x86/guest/vmexit.c index 2888278aa..ed48c5736 100644 --- a/hypervisor/arch/x86/guest/vmexit.c +++ b/hypervisor/arch/x86/guest/vmexit.c @@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu); static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu); +static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu); /* VM Dispatch table for Exit condition handling */ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { @@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = { [VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = { .handler = unhandled_vmexit_handler}, [VMX_EXIT_REASON_MWAIT] = { - .handler = unhandled_vmexit_handler}, + .handler = mwait_monitor_vmexit_handler}, [VMX_EXIT_REASON_MONITOR_TRAP] = { .handler = mtf_vmexit_handler}, [VMX_EXIT_REASON_MONITOR] = { - .handler = unhandled_vmexit_handler}, + .handler = mwait_monitor_vmexit_handler}, [VMX_EXIT_REASON_PAUSE] = { .handler = pause_vmexit_handler}, [VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = { @@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu) return ret; } +static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu) +{ + pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ", + exec_vmread(VMX_GUEST_RIP)); + + vcpu_inject_ud(vcpu); + + return 0; +} + static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu) { pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ", diff --git a/hypervisor/arch/x86/guest/vmsr.c b/hypervisor/arch/x86/guest/vmsr.c index 840e1dfcb..ba5caea2b 100644 --- a/hypervisor/arch/x86/guest/vmsr.c +++ b/hypervisor/arch/x86/guest/vmsr.c @@ -934,7 +934,6 @@ static void set_guest_tsc_adjust(struct acrn_vcpu *vcpu, uint64_t tsc_adjust) */ static void set_guest_ia32_misc_enalbe(struct acrn_vcpu *vcpu, uint64_t v) { - uint32_t eax, ebx = 0U, ecx = 0U, edx = 0U; bool update_vmsr = true; uint64_t effective_guest_msr = v; @@ -945,26 +944,12 @@ static void set_guest_ia32_misc_enalbe(struct acrn_vcpu *vcpu, uint64_t v) vcpu_set_efer(vcpu, vcpu_get_efer(vcpu) & ~MSR_IA32_EFER_NXE_BIT); } - /* Handle MISC_ENABLE_MONITOR_ENA - * - if guest try to set this bit, do nothing. - * - if guest try to clear this bit, MISC_ENABLE_MONITOR_ENA bit of guest MSR_IA32_MISC_ENABLE - * shall be cleared. + /* MONITOR/MWAIT is hide. + * MISC_ENABLE_MONITOR_ENA should not be set. */ - if (((v ^ vcpu_get_guest_msr(vcpu, MSR_IA32_MISC_ENABLE)) & MSR_IA32_MISC_ENABLE_MONITOR_ENA) != 0UL) { - eax = 1U; - guest_cpuid(vcpu, &eax, &ebx, &ecx, &edx); - /* According to SDM Vol4 2.1 Table 2-2, - * Writing this bit when the SSE3 feature flag is set to 0 may generate a #GP exception. - */ - if ((ecx & CPUID_ECX_SSE3) == 0U) { - vcpu_inject_gp(vcpu, 0U); - update_vmsr = false; - } else if (vcpu->vm->arch_vm.vm_mwait_cap) { - /* guest cpuid.01H will be updated when guest executes 'cpuid' with leaf 01H */ - effective_guest_msr &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA; - } else { - update_vmsr = false; - } + if ((v & MSR_IA32_MISC_ENABLE_MONITOR_ENA) != 0UL) { + vcpu_inject_gp(vcpu, 0U); + update_vmsr = false; } if (update_vmsr) { diff --git a/hypervisor/include/arch/x86/asm/cpuid.h b/hypervisor/include/arch/x86/asm/cpuid.h index 7f41a10c2..e178e3305 100644 --- a/hypervisor/include/arch/x86/asm/cpuid.h +++ b/hypervisor/include/arch/x86/asm/cpuid.h @@ -42,6 +42,8 @@ #define CPUID_ECX_OSXSAVE (1U<<27U) #define CPUID_ECX_AVX (1U<<28U) #define CPUID_ECX_HV (1U<<31U) +#define CPUID_ECX_MWAIT (1U<<0U) +#define CPUID_ECX_MWAIT_INT (1U<<1U) #define CPUID_EDX_FPU (1U<<0U) #define CPUID_EDX_VME (1U<<1U) #define CPUID_EDX_DE (1U<<2U) @@ -100,6 +102,8 @@ #define CPUID_ECX_UMIP (1U<<2U) /* CPUID.07H:ECX.PKE */ #define CPUID_ECX_PKE (1U<<3U) +/* CPUID.07H:ECX.WAITPKG */ +#define CPUID_ECX_WAITPKG (1U<<5U) /* CPUID.07H:ECX.CET_SS */ #define CPUID_ECX_CET_SS (1U<<7U) /* CPUID.07H:ECX.LA57 */ diff --git a/hypervisor/include/arch/x86/asm/guest/vm.h b/hypervisor/include/arch/x86/asm/guest/vm.h index 62b606be1..d26869bb9 100644 --- a/hypervisor/include/arch/x86/asm/guest/vm.h +++ b/hypervisor/include/arch/x86/asm/guest/vm.h @@ -131,8 +131,6 @@ struct vm_arch { spinlock_t iwkey_backup_lock; /* Spin-lock used to protect internal key backup/restore */ struct iwkey iwkey_backup; - /* reference to virtual platform to come here (as needed) */ - bool vm_mwait_cap; } __aligned(PAGE_SIZE); struct acrn_vm {