From 316731c9a58990d2e0dfc89903a68ac65429d3e4 Mon Sep 17 00:00:00 2001 From: Yonghua Huang Date: Thu, 29 Mar 2018 01:31:19 +0800 Subject: [PATCH] hv: pass-through xsave feature to guests enable Xsave feature and pass-through it to guests update based on v2: - enable host xsave before expose it to guests. - add validation for the value to be set to 'xcr0' before call xsetbv when handling xsetbv vmexit. - tested in SOS guest, created two threads to do different FP calculations,test code runs in user land of sos. Signed-off-by: Yonghua Huang Acked-by: Eddie Dong --- hypervisor/arch/x86/cpu.c | 28 +++++++++++++++ hypervisor/arch/x86/cpuid.c | 24 ++++++++++++- hypervisor/arch/x86/vmexit.c | 60 +++++++++++++++++++++++-------- hypervisor/arch/x86/vmx.c | 5 +++ hypervisor/include/arch/x86/cpu.h | 10 ++++++ hypervisor/include/arch/x86/vmx.h | 2 ++ 6 files changed, 114 insertions(+), 15 deletions(-) diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index b0e4049f8..1bfed18db 100644 --- a/hypervisor/arch/x86/cpu.c +++ b/hypervisor/arch/x86/cpu.c @@ -80,6 +80,7 @@ static struct cpu_capability cpu_caps; struct cpuinfo_x86 boot_cpu_data; static void vapic_cap_detect(void); +static void cpu_xsave_init(void); static void cpu_set_logical_id(uint32_t logical_id); static void print_hv_banner(void); int cpu_find_logical_id(uint32_t lapic_id); @@ -364,6 +365,8 @@ void bsp_boot_init(void) vapic_cap_detect(); + cpu_xsave_init(); + /* Set state for this CPU to initializing */ cpu_set_current_state(CPU_BOOT_ID, CPU_STATE_INITIALIZING); @@ -490,6 +493,8 @@ void cpu_secondary_init(void) pr_dbg("Core %d is up", get_cpu_id()); + cpu_xsave_init(); + /* Release secondary boot spin-lock to allow one of the next CPU(s) to * perform this common initialization */ @@ -699,3 +704,26 @@ bool is_vapic_virt_reg_supported(void) { return ((cpu_caps.vapic_features & VAPIC_FEATURE_VIRT_REG) != 0); } + +bool is_xsave_supported(void) +{ + /* + *todo: + *below flag also should be tested, but current it will be false + *as it is not updated after turning on the host's CR4.OSXSAVE bit, + *will be fixed in cpuid related patch. + *boot_cpu_data.cpuid_leaves[FEAT_1_ECX] & CPUID_ECX_OSXSAVE + **/ + return !!(boot_cpu_data.cpuid_leaves[FEAT_1_ECX] & CPUID_ECX_XSAVE); +} + +static void cpu_xsave_init(void) +{ + uint64_t val64; + + if (is_xsave_supported()) { + CPU_CR_READ(cr4, &val64); + val64 |= CR4_OSXSAVE; + CPU_CR_WRITE(cr4, val64); + } +} diff --git a/hypervisor/arch/x86/cpuid.c b/hypervisor/arch/x86/cpuid.c index 7271d46cd..e9563296e 100644 --- a/hypervisor/arch/x86/cpuid.c +++ b/hypervisor/arch/x86/cpuid.c @@ -285,7 +285,7 @@ void guest_cpuid(struct vcpu *vcpu, uint32_t subleaf = *ecx; /* vm related */ - if (leaf != 0x1 && leaf != 0xb) { + if (leaf != 0x1 && leaf != 0xb && leaf != 0xd) { struct vcpuid_entry *entry = find_vcpuid_entry(vcpu, leaf, subleaf); @@ -329,6 +329,18 @@ void guest_cpuid(struct vcpu *vcpu, /*mask vmx to guest os */ *ecx &= ~CPUID_ECX_VMX; + /*no xsave support for guest if it is not enabled on host*/ + if (!(*ecx & CPUID_ECX_OSXSAVE)) + *ecx &= ~CPUID_ECX_XSAVE; + + *ecx &= ~CPUID_ECX_OSXSAVE; + if (*ecx & CPUID_ECX_XSAVE) { + uint64_t cr4; + /*read guest CR4*/ + cr4 = exec_vmread(VMX_GUEST_CR4); + if (cr4 & CR4_OSXSAVE) + *ecx |= CPUID_ECX_OSXSAVE; + } break; } @@ -343,6 +355,16 @@ void guest_cpuid(struct vcpu *vcpu, cpuid_subleaf(leaf, subleaf, eax, ebx, ecx, edx); break; + case 0x0d: + if (!is_xsave_supported()) { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + } else + cpuid_subleaf(leaf, subleaf, eax, ebx, ecx, edx); + break; + default: break; } diff --git a/hypervisor/arch/x86/vmexit.c b/hypervisor/arch/x86/vmexit.c index a90326ca3..006497bcd 100644 --- a/hypervisor/arch/x86/vmexit.c +++ b/hypervisor/arch/x86/vmexit.c @@ -37,6 +37,7 @@ static int rdtscp_handler(struct vcpu *vcpu); static int unhandled_vmexit_handler(struct vcpu *vcpu); static int rdtsc_handler(struct vcpu *vcpu); +static int xsetbv_vmexit_handler(struct vcpu *vcpu); /* VM Dispatch table for Exit condition handling */ static const struct vm_exit_dispatch dispatch_table[] = { [VMX_EXIT_REASON_EXCEPTION_OR_NMI] = { @@ -151,7 +152,7 @@ static const struct vm_exit_dispatch dispatch_table[] = { [VMX_EXIT_REASON_WBINVD] = { .handler = unhandled_vmexit_handler}, [VMX_EXIT_REASON_XSETBV] = { - .handler = unhandled_vmexit_handler}, + .handler = xsetbv_vmexit_handler}, [VMX_EXIT_REASON_APIC_WRITE] = { .handler = apic_write_vmexit_handler, .need_exit_qualification = 1} @@ -418,26 +419,57 @@ int invlpg_handler(__unused struct vcpu *vcpu) return 0; } +#endif /* - * XSETBV instruction set's the XCR0 that is used to tell for which components - * states can be saved on a context switch using xsave. - * - * We don't handle this right now because we are on a platform that does not - * support XSAVE/XRSTORE feature as reflected by the instruction CPUID. - * - * to make sure this never get called until we support it we can prevent the - * reading of this bit in CPUID VMEXIT. - * - * Linux checks this in CPUID: cpufeature.h: #define cpu_has_xsave + * XSETBV instruction set's the XCR0 that is used to tell for which + * components states can be saved on a context switch using xsave. */ -static int xsetbv_instr_handler(__unused struct vcpu *vcpu) +static int xsetbv_vmexit_handler(struct vcpu *vcpu) { - ASSERT("Not Supported" == 0, "XSETBV executed"); + int idx; + uint64_t val64; + struct run_context *ctx_ptr; + val64 = exec_vmread(VMX_GUEST_CR4); + if (!(val64 & CR4_OSXSAVE)) { + vcpu_inject_gp(vcpu); + return -1; + } + + idx = vcpu->arch_vcpu.cur_context; + if (idx >= NR_WORLD) + return -1; + + ctx_ptr = &(vcpu->arch_vcpu.contexts[idx]); + + /*to access XCR0,'rcx' should be 0*/ + if (ctx_ptr->guest_cpu_regs.regs.rcx != 0) { + vcpu_inject_gp(vcpu); + return -1; + } + + val64 = ((ctx_ptr->guest_cpu_regs.regs.rax) & 0xffffffff) | + (ctx_ptr->guest_cpu_regs.regs.rdx << 32); + + /*bit 0(x87 state) of XCR0 can't be cleared*/ + if (!(val64 & 0x01)) { + vcpu_inject_gp(vcpu); + return -1; + } + + /*XCR0[2:1] (SSE state & AVX state) can't not be + *set to 10b as it is necessary to set both bits + *to use AVX instructions. + **/ + if (((val64 >> 1) & 0x3) == 0x2) { + vcpu_inject_gp(vcpu); + return -1; + } + + write_xcr(0, val64); return 0; } -#endif static int rdtsc_handler(struct vcpu *vcpu) { diff --git a/hypervisor/arch/x86/vmx.c b/hypervisor/arch/x86/vmx.c index fe53f16a5..702bd6425 100644 --- a/hypervisor/arch/x86/vmx.c +++ b/hypervisor/arch/x86/vmx.c @@ -931,6 +931,11 @@ static void init_exec_ctrl(struct vcpu *vcpu) exec_vmwrite(VMX_TPR_THRESHOLD, 0); } + if (is_xsave_supported()) { + exec_vmwrite64(VMX_XSS_EXITING_BITMAP_FULL, 0); + value32 |= VMX_PROCBASED_CTLS2_XSVE_XRSTR; + } + exec_vmwrite(VMX_PROC_VM_EXEC_CONTROLS2, value32); pr_dbg("VMX_PROC_VM_EXEC_CONTROLS2: 0x%x ", value32); diff --git a/hypervisor/include/arch/x86/cpu.h b/hypervisor/include/arch/x86/cpu.h index 0c5ae4219..60adb7eda 100644 --- a/hypervisor/include/arch/x86/cpu.h +++ b/hypervisor/include/arch/x86/cpu.h @@ -247,6 +247,7 @@ bool is_vapic_supported(void); bool is_vapic_intr_delivery_supported(void); bool is_vapic_virt_reg_supported(void); bool get_vmx_cap(void); +bool is_xsave_supported(void); /* Read control register */ #define CPU_CR_READ(cr, result_ptr) \ @@ -427,6 +428,15 @@ msr_write(uint32_t reg_num, uint64_t value64) CPU_MSR_WRITE(reg_num, value64); } +static inline void +write_xcr(int reg, uint64_t val) +{ + uint32_t low, high; + + low = val; + high = val >> 32; + asm volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); +} #else /* ASSEMBLER defined */ #endif /* ASSEMBLER defined */ diff --git a/hypervisor/include/arch/x86/vmx.h b/hypervisor/include/arch/x86/vmx.h index 317f1e1a6..da34ee543 100644 --- a/hypervisor/include/arch/x86/vmx.h +++ b/hypervisor/include/arch/x86/vmx.h @@ -83,6 +83,8 @@ #define VMX_EOI_EXIT3_FULL 0x00002022 #define VMX_EOI_EXIT3_HIGH 0x00002023 #define VMX_EOI_EXIT(vector) (VMX_EOI_EXIT0_FULL + ((vector) / 64) * 2) +#define VMX_XSS_EXITING_BITMAP_FULL 0x0000202C +#define VMX_XSS_EXITING_BITMAP_HIGH 0x0000202D /* 64-bit read-only data fields */ #define VMX_GUEST_PHYSICAL_ADDR_FULL 0x00002400 #define VMX_GUEST_PHYSICAL_ADDR_HIGH 0x00002401