hv: hide mwait from guest.

When CPU support MONITOR/MWAIT, OS prefer to use it enter
deeper C-state.

Now ACRN pass through MONITOR/MWAIT to guest.

For vCPUs (ie vCPU A and vCPU B) share a pCPU, if vCPU A uses MWait to enter C state,
vCPU B could run only after the time slice of vCPU A is expired. This time slice of
vCPU A is gone to waste.

For Local APIC pass-through (used for RTVM), the guest pay more attention to
timeliness than power saving.

So this patch hides MONITOR/MWAIT by:
    1. Clear vCPUID.05H, vCPUID.01H:ECX.[bit 3] and
    MSR_IA32_MISC_ENABLE_MONITOR_ENA to tell the guest VM's vCPU
    does not support MONITOR/MAIT.
    2. Enable MSR_IA32_MISC_ENABLE_MONITOR_ENA bit for
    MSR_IA32_MISC_ENABLE inject 'GP'.
    3. Trap instruction 'MONITOR' and 'MWAIT' and inject 'UD'.
    4. Clear vCPUID.07H:ECX.[bit 5] to hide 'UMONITOR/UMWAIT'.
    5. Clear  "enable user wait and pause" VM-execution control, so
    UMONITOR/MWAIT causes an 'UD'.

Tracked-On: #8253
Signed-off-by: Yuanyuan Zhao <yuanyuan.zhao@linux.intel.com>
Reviewed-by: Fei Li <fei1.li@intel.com>
This commit is contained in:
Yuanyuan Zhao 2022-07-11 10:07:50 +08:00 committed by acrnsi-robot
parent 4083da9470
commit 0a4c76357e
7 changed files with 38 additions and 34 deletions

View File

@ -545,6 +545,9 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
}
}
break;
/* MONITOR/MWAIT */
case 0x05U:
break;
case 0x07U:
init_vcpuid_entry(i, 0U, CPUID_CHECK_SUBLEAF, &entry);
if (entry.eax != 0U) {
@ -554,6 +557,7 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
if (is_vsgx_supported(vm->vm_id)) {
entry.ebx |= CPUID_EBX_SGX;
}
entry.ecx &= ~CPUID_ECX_WAITPKG;
#ifdef CONFIG_VCAT_ENABLED
if (is_vcat_configured(vm)) {
@ -618,7 +622,6 @@ int32_t set_vcpuid_entries(struct acrn_vm *vm)
static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
{
uint32_t apicid = vlapic_get_apicid(vcpu_vlapic(vcpu));
uint64_t guest_ia32_misc_enable = vcpu_get_guest_msr(vcpu, MSR_IA32_MISC_ENABLE);
uint64_t cr4_reserved_mask = get_cr4_reserved_bits();
cpuid_subleaf(0x1U, 0x0U, eax, ebx, ecx, edx);
@ -651,15 +654,10 @@ static void guest_cpuid_01h(struct acrn_vcpu *vcpu, uint32_t *eax, uint32_t *ebx
*ecx &= ~CPUID_ECX_PCID;
}
/* guest monitor/mwait is supported only if it is allowed('vm_mwait_cap' is true)
* and MSR_IA32_MISC_ENABLE_MONITOR_ENA bit of guest MSR_IA32_MISC_ENABLE is set,
* else clear cpuid.01h[3].
/*
* Hide MONITOR/MWAIT.
*/
*ecx &= ~CPUID_ECX_MONITOR;
if (vcpu->vm->arch_vm.vm_mwait_cap &&
((guest_ia32_misc_enable & MSR_IA32_MISC_ENABLE_MONITOR_ENA) != 0UL)) {
*ecx |= CPUID_ECX_MONITOR;
}
*ecx &= ~CPUID_ECX_OSXSAVE;
if ((*ecx & CPUID_ECX_XSAVE) != 0U) {

View File

@ -738,7 +738,6 @@ int32_t create_vm(uint16_t vm_id, uint64_t pcpu_bitmap, struct acrn_vm_config *v
spinlock_init(&vm->arch_vm.iwkey_backup_lock);
vm->arch_vm.vlapic_mode = VM_VLAPIC_XAPIC;
vm->arch_vm.vm_mwait_cap = has_monitor_cap();
vm->intr_inject_delay_delta = 0UL;
vm->nr_emul_mmio_regions = 0U;
vm->vcpuid_entry_nr = 0U;

View File

@ -53,7 +53,8 @@ static void init_guest_vmx(struct acrn_vcpu *vcpu, uint64_t cr0, uint64_t cr3,
load_segment(ectx->ldtr, VMX_GUEST_LDTR);
/* init guest ia32_misc_enable value for guest read */
vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE, msr_read(MSR_IA32_MISC_ENABLE));
vcpu_set_guest_msr(vcpu, MSR_IA32_MISC_ENABLE,
(msr_read(MSR_IA32_MISC_ENABLE) & (~MSR_IA32_MISC_ENABLE_MONITOR_ENA)));
vcpu_set_guest_msr(vcpu, MSR_IA32_PERF_CTL, msr_read(MSR_IA32_PERF_CTL));
@ -313,6 +314,11 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 |= VMX_PROCBASED_CTLS_RDPMC;
}
/*
* Enable MONITOR/MWAIT cause a VM-EXIT.
*/
value32 |= VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_MONITOR;
vcpu->arch.proc_vm_exec_ctrls = value32;
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS, value32);
pr_dbg("VMX_PROC_VM_EXEC_CONTROLS: 0x%x ", value32);
@ -335,6 +341,9 @@ static void init_exec_ctrl(struct acrn_vcpu *vcpu)
value32 &= ~VMX_PROCBASED_CTLS2_INVPCID;
}
/* Enable TPAUSE, UMONITOR/UWAIT cause a #UD. */
value32 &= ~VMX_PROCBASED_CTLS2_UWAIT_PAUSE;
if (is_apicv_advanced_feature_supported()) {
value32 |= VMX_PROCBASED_CTLS2_VIRQ;
value32 |= VMX_PROCBASED_CTLS2_VAPIC_REGS;

View File

@ -40,6 +40,7 @@ static int32_t hlt_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t mtf_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t loadiwkey_vmexit_handler(struct acrn_vcpu *vcpu);
static int32_t init_signal_vmexit_handler(__unused struct acrn_vcpu *vcpu);
static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu);
/* VM Dispatch table for Exit condition handling */
static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
@ -151,11 +152,11 @@ static const struct vm_exit_dispatch dispatch_table[NR_VMX_EXIT_REASONS] = {
[VMX_EXIT_REASON_ENTRY_FAILURE_MSR_LOADING] = {
.handler = unhandled_vmexit_handler},
[VMX_EXIT_REASON_MWAIT] = {
.handler = unhandled_vmexit_handler},
.handler = mwait_monitor_vmexit_handler},
[VMX_EXIT_REASON_MONITOR_TRAP] = {
.handler = mtf_vmexit_handler},
[VMX_EXIT_REASON_MONITOR] = {
.handler = unhandled_vmexit_handler},
.handler = mwait_monitor_vmexit_handler},
[VMX_EXIT_REASON_PAUSE] = {
.handler = pause_vmexit_handler},
[VMX_EXIT_REASON_ENTRY_FAILURE_MACHINE_CHECK] = {
@ -287,6 +288,16 @@ int32_t vmexit_handler(struct acrn_vcpu *vcpu)
return ret;
}
static int32_t mwait_monitor_vmexit_handler (struct acrn_vcpu *vcpu)
{
pr_fatal("Error: Unsupported mwait option from guest at 0x%016lx ",
exec_vmread(VMX_GUEST_RIP));
vcpu_inject_ud(vcpu);
return 0;
}
static int32_t unhandled_vmexit_handler(struct acrn_vcpu *vcpu)
{
pr_fatal("Error: Unhandled VM exit condition from guest at 0x%016lx ",

View File

@ -934,7 +934,6 @@ static void set_guest_tsc_adjust(struct acrn_vcpu *vcpu, uint64_t tsc_adjust)
*/
static void set_guest_ia32_misc_enalbe(struct acrn_vcpu *vcpu, uint64_t v)
{
uint32_t eax, ebx = 0U, ecx = 0U, edx = 0U;
bool update_vmsr = true;
uint64_t effective_guest_msr = v;
@ -945,26 +944,12 @@ static void set_guest_ia32_misc_enalbe(struct acrn_vcpu *vcpu, uint64_t v)
vcpu_set_efer(vcpu, vcpu_get_efer(vcpu) & ~MSR_IA32_EFER_NXE_BIT);
}
/* Handle MISC_ENABLE_MONITOR_ENA
* - if guest try to set this bit, do nothing.
* - if guest try to clear this bit, MISC_ENABLE_MONITOR_ENA bit of guest MSR_IA32_MISC_ENABLE
* shall be cleared.
/* MONITOR/MWAIT is hide.
* MISC_ENABLE_MONITOR_ENA should not be set.
*/
if (((v ^ vcpu_get_guest_msr(vcpu, MSR_IA32_MISC_ENABLE)) & MSR_IA32_MISC_ENABLE_MONITOR_ENA) != 0UL) {
eax = 1U;
guest_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
/* According to SDM Vol4 2.1 Table 2-2,
* Writing this bit when the SSE3 feature flag is set to 0 may generate a #GP exception.
*/
if ((ecx & CPUID_ECX_SSE3) == 0U) {
vcpu_inject_gp(vcpu, 0U);
update_vmsr = false;
} else if (vcpu->vm->arch_vm.vm_mwait_cap) {
/* guest cpuid.01H will be updated when guest executes 'cpuid' with leaf 01H */
effective_guest_msr &= ~MSR_IA32_MISC_ENABLE_MONITOR_ENA;
} else {
update_vmsr = false;
}
if ((v & MSR_IA32_MISC_ENABLE_MONITOR_ENA) != 0UL) {
vcpu_inject_gp(vcpu, 0U);
update_vmsr = false;
}
if (update_vmsr) {

View File

@ -42,6 +42,8 @@
#define CPUID_ECX_OSXSAVE (1U<<27U)
#define CPUID_ECX_AVX (1U<<28U)
#define CPUID_ECX_HV (1U<<31U)
#define CPUID_ECX_MWAIT (1U<<0U)
#define CPUID_ECX_MWAIT_INT (1U<<1U)
#define CPUID_EDX_FPU (1U<<0U)
#define CPUID_EDX_VME (1U<<1U)
#define CPUID_EDX_DE (1U<<2U)
@ -100,6 +102,8 @@
#define CPUID_ECX_UMIP (1U<<2U)
/* CPUID.07H:ECX.PKE */
#define CPUID_ECX_PKE (1U<<3U)
/* CPUID.07H:ECX.WAITPKG */
#define CPUID_ECX_WAITPKG (1U<<5U)
/* CPUID.07H:ECX.CET_SS */
#define CPUID_ECX_CET_SS (1U<<7U)
/* CPUID.07H:ECX.LA57 */

View File

@ -131,8 +131,6 @@ struct vm_arch {
spinlock_t iwkey_backup_lock; /* Spin-lock used to protect internal key backup/restore */
struct iwkey iwkey_backup;
/* reference to virtual platform to come here (as needed) */
bool vm_mwait_cap;
} __aligned(PAGE_SIZE);
struct acrn_vm {