1629 lines
47 KiB
C
1629 lines
47 KiB
C
/*
|
||
* Copyright (C) 2021 Intel Corporation.
|
||
*
|
||
* SPDX-License-Identifier: BSD-3-Clause
|
||
*/
|
||
|
||
#include <types.h>
|
||
#include <logmsg.h>
|
||
#include <asm/mmu.h>
|
||
#include <asm/guest/virq.h>
|
||
#include <asm/guest/ept.h>
|
||
#include <asm/guest/vcpu.h>
|
||
#include <asm/guest/vm.h>
|
||
#include <asm/guest/vmcs.h>
|
||
#include <asm/guest/nested.h>
|
||
#include <asm/guest/vept.h>
|
||
|
||
/* Cache the content of MSR_IA32_VMX_BASIC */
|
||
static uint32_t vmx_basic;
|
||
|
||
static void disable_vmcs_shadowing(void);
|
||
static void clear_vvmcs(struct acrn_vcpu *vcpu, struct acrn_vvmcs *vvmcs);
|
||
|
||
/* The only purpose of this array is to serve the is_vmx_msr() function */
|
||
static const uint32_t vmx_msrs[NUM_VMX_MSRS] = {
|
||
LIST_OF_VMX_MSRS
|
||
};
|
||
|
||
bool is_vmx_msr(uint32_t msr)
|
||
{
|
||
bool found = false;
|
||
uint32_t i;
|
||
|
||
for (i = 0U; i < NUM_VMX_MSRS; i++) {
|
||
if (msr == vmx_msrs[i]) {
|
||
found = true;
|
||
break;
|
||
}
|
||
}
|
||
|
||
return found;
|
||
}
|
||
|
||
static uint64_t adjust_vmx_ctrls(uint32_t msr, uint64_t request_bits)
|
||
{
|
||
union value_64 val64, msr_val;
|
||
|
||
/*
|
||
* ISDM Appendix A.3, A.4, A.5:
|
||
* - Bits 31:0 indicate the allowed 0-settings of these controls.
|
||
* bit X of the corresponding VM-execution controls field is allowed to be 0
|
||
* if bit X in the MSR is cleared to 0
|
||
* - Bits 63:32 indicate the allowed 1-settings of these controls.
|
||
* VM entry allows control X to be 1 if bit 32+X in the MSR is set to 1
|
||
*/
|
||
msr_val.full = msr_read(msr);
|
||
|
||
/*
|
||
* The reserved bits in VMCS Control fields could be 0 or 1, determined by the
|
||
* corresponding capability MSR. So need to read them from physical MSR.
|
||
*
|
||
* We consider the bits that are set in the allowed 0-settings group as the
|
||
* minimal set of bits that need to be set from the physical processor's perspective.
|
||
* Since we shadow this control field, we passthru the allowed 0-settings bits.
|
||
*/
|
||
val64.u.lo_32 = msr_val.u.lo_32;
|
||
|
||
/* allowed 1-settings include those bits are NOT allowed to be 0 */
|
||
val64.u.hi_32 = msr_val.u.lo_32;
|
||
|
||
/* make sure the requested features are supported by hardware */
|
||
val64.u.hi_32 |= (msr_val.u.hi_32 & request_bits);
|
||
|
||
return val64.full;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
void init_vmx_msrs(struct acrn_vcpu *vcpu)
|
||
{
|
||
union value_64 val64;
|
||
uint64_t request_bits, msr_value;
|
||
|
||
if (is_nvmx_configured(vcpu->vm)) {
|
||
/* MSR_IA32_VMX_BASIC */
|
||
val64.full = VMCS12_REVISION_ID /* Bits 30:0 - VMCS revision ID */
|
||
| (4096UL << 32U) /* Bits 44:32 - size of VMXON region and VMCS region */
|
||
| (6UL << 50U) /* Bits 53:50 - memory type for VMCS etc. (6: Write Back) */
|
||
| (1UL << 54U) /* Bit 54: VM-exit instruction-information for INS and OUTS */
|
||
| (1UL << 55U); /* Bit 55: VMX controls that default to 1 may be cleared to 0 */
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_BASIC, val64.full);
|
||
|
||
/* MSR_IA32_VMX_MISC */
|
||
|
||
/*
|
||
* some bits need to read from physical MSR. For exmaple Bits 4:0 report the relationship between
|
||
* the rate of the VMX-preemption timer and that of the timestamp counter (TSC).
|
||
*/
|
||
val64.full = msr_read(MSR_IA32_VMX_MISC);
|
||
val64.u.hi_32 = 0U;
|
||
|
||
/* Don't support Intel® Processor Trace (Intel PT) in VMX operation */
|
||
val64.u.lo_32 &= ~(1U << 14U);
|
||
|
||
/* Don't support SMM in VMX operation */
|
||
val64.u.lo_32 &= ~((1U << 15U) | (1U << 28U));
|
||
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_MISC, val64.full);
|
||
|
||
/*
|
||
* TODO: These emulated VMX Control MSRs work for Tiger Lake and Kaby Lake,
|
||
* potentially it may have problems if run on other platforms.
|
||
*
|
||
* We haven't put our best efforts to try to enable as much as features as
|
||
* possible.
|
||
*/
|
||
|
||
/* MSR_IA32_VMX_PINBASED_CTLS */
|
||
request_bits = VMX_PINBASED_CTLS_IRQ_EXIT
|
||
| VMX_PINBASED_CTLS_NMI_EXIT
|
||
| VMX_PINBASED_CTLS_ENABLE_PTMR;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PINBASED_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PINBASED_CTLS, msr_value);
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_TRUE_PINBASED_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, msr_value);
|
||
|
||
/* MSR_IA32_VMX_PROCBASED_CTLS */
|
||
request_bits = VMX_PROCBASED_CTLS_IRQ_WIN | VMX_PROCBASED_CTLS_TSC_OFF
|
||
| VMX_PROCBASED_CTLS_HLT | VMX_PROCBASED_CTLS_INVLPG
|
||
| VMX_PROCBASED_CTLS_MWAIT | VMX_PROCBASED_CTLS_RDPMC
|
||
| VMX_PROCBASED_CTLS_RDTSC | VMX_PROCBASED_CTLS_CR3_LOAD
|
||
| VMX_PROCBASED_CTLS_CR3_STORE | VMX_PROCBASED_CTLS_CR8_LOAD
|
||
| VMX_PROCBASED_CTLS_CR8_STORE | VMX_PROCBASED_CTLS_NMI_WINEXIT
|
||
| VMX_PROCBASED_CTLS_MOV_DR | VMX_PROCBASED_CTLS_UNCOND_IO
|
||
| VMX_PROCBASED_CTLS_MSR_BITMAP | VMX_PROCBASED_CTLS_MONITOR
|
||
| VMX_PROCBASED_CTLS_PAUSE | VMX_PROCBASED_CTLS_SECONDARY;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS, msr_value);
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, msr_value);
|
||
|
||
/* MSR_IA32_VMX_PROCBASED_CTLS2 */
|
||
request_bits = VMX_PROCBASED_CTLS2_EPT | VMX_PROCBASED_CTLS2_RDTSCP
|
||
| VMX_PROCBASED_CTLS2_VPID | VMX_PROCBASED_CTLS2_WBINVD
|
||
| VMX_PROCBASED_CTLS2_UNRESTRICT | VMX_PROCBASED_CTLS2_PAUSE_LOOP
|
||
| VMX_PROCBASED_CTLS2_RDRAND | VMX_PROCBASED_CTLS2_INVPCID
|
||
| VMX_PROCBASED_CTLS2_RDSEED | VMX_PROCBASED_CTLS2_XSVE_XRSTR
|
||
| VMX_PROCBASED_CTLS2_TSC_SCALING;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_PROCBASED_CTLS2, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2, msr_value);
|
||
|
||
/* MSR_IA32_VMX_EXIT_CTLS */
|
||
request_bits = VMX_EXIT_CTLS_SAVE_DBG | VMX_EXIT_CTLS_HOST_ADDR64
|
||
| VMX_EXIT_CTLS_ACK_IRQ | VMX_EXIT_CTLS_LOAD_PAT
|
||
| VMX_EXIT_CTLS_LOAD_EFER;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_EXIT_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EXIT_CTLS, msr_value);
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_TRUE_EXIT_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, msr_value);
|
||
|
||
/* MSR_IA32_VMX_ENTRY_CTLS */
|
||
request_bits = VMX_ENTRY_CTLS_LOAD_DBG | VMX_ENTRY_CTLS_IA32E_MODE
|
||
| VMX_ENTRY_CTLS_LOAD_PERF | VMX_ENTRY_CTLS_LOAD_PAT
|
||
| VMX_ENTRY_CTLS_LOAD_EFER;
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_ENTRY_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_ENTRY_CTLS, msr_value);
|
||
msr_value = adjust_vmx_ctrls(MSR_IA32_VMX_TRUE_ENTRY_CTLS, request_bits);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_EPT_VPID_CAP);
|
||
/*
|
||
* Hide 5 level EPT capability
|
||
* Hide accessed and dirty flags for EPT
|
||
*/
|
||
msr_value &= ~(VMX_EPT_PAGE_WALK_5 | VMX_EPT_AD | VMX_EPT_2MB_PAGE | VMX_EPT_1GB_PAGE);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, msr_value);
|
||
|
||
/* For now passthru the value from physical MSR to L1 guest */
|
||
msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED0);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED0, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR0_FIXED1);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR0_FIXED1, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED0);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED0, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_CR4_FIXED1);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_CR4_FIXED1, msr_value);
|
||
|
||
msr_value = msr_read(MSR_IA32_VMX_VMCS_ENUM);
|
||
vcpu_set_guest_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, msr_value);
|
||
}
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t read_vmx_msr(struct acrn_vcpu *vcpu, uint32_t msr, uint64_t *val)
|
||
{
|
||
uint64_t v = 0UL;
|
||
int32_t err = 0;
|
||
|
||
if (is_nvmx_configured(vcpu->vm)) {
|
||
switch (msr) {
|
||
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
||
case MSR_IA32_VMX_PINBASED_CTLS:
|
||
case MSR_IA32_VMX_PROCBASED_CTLS:
|
||
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
||
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
||
case MSR_IA32_VMX_EXIT_CTLS:
|
||
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
||
case MSR_IA32_VMX_ENTRY_CTLS:
|
||
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
||
case MSR_IA32_VMX_BASIC:
|
||
case MSR_IA32_VMX_MISC:
|
||
case MSR_IA32_VMX_EPT_VPID_CAP:
|
||
case MSR_IA32_VMX_CR0_FIXED0:
|
||
case MSR_IA32_VMX_CR0_FIXED1:
|
||
case MSR_IA32_VMX_CR4_FIXED0:
|
||
case MSR_IA32_VMX_CR4_FIXED1:
|
||
case MSR_IA32_VMX_VMCS_ENUM:
|
||
{
|
||
v = vcpu_get_guest_msr(vcpu, msr);
|
||
break;
|
||
}
|
||
/* Don't support these MSRs yet */
|
||
case MSR_IA32_SMBASE:
|
||
case MSR_IA32_VMX_PROCBASED_CTLS3:
|
||
case MSR_IA32_VMX_VMFUNC:
|
||
default:
|
||
err = -EACCES;
|
||
break;
|
||
}
|
||
} else {
|
||
err = -EACCES;
|
||
}
|
||
|
||
*val = v;
|
||
return err;
|
||
}
|
||
|
||
#define MAX_SHADOW_VMCS_FIELDS 113U
|
||
/*
|
||
* VMCS fields included in the dual-purpose VMCS: as shadow for L1 and
|
||
* as hardware VMCS for nested guest (L2).
|
||
*
|
||
* TODO: This list is for TGL and CFL machines and the fields
|
||
* for advacned APICv features such as Posted Interrupt and Virtual
|
||
* Interrupt Delivery are not included, as these are not available
|
||
* on those platforms.
|
||
*
|
||
* Certain fields, e.g. VMX_TSC_MULTIPLIER_FULL is available only if
|
||
* "use TSC scaling” is supported. Thus a static array may not work
|
||
* for all platforms.
|
||
*/
|
||
static const uint32_t vmcs_shadowing_fields[MAX_SHADOW_VMCS_FIELDS] = {
|
||
/* 16-bits */
|
||
VMX_GUEST_ES_SEL,
|
||
VMX_GUEST_CS_SEL,
|
||
VMX_GUEST_SS_SEL,
|
||
VMX_GUEST_DS_SEL,
|
||
VMX_GUEST_FS_SEL,
|
||
VMX_GUEST_GS_SEL,
|
||
VMX_GUEST_LDTR_SEL,
|
||
VMX_GUEST_TR_SEL,
|
||
VMX_GUEST_PML_INDEX,
|
||
|
||
/* 64-bits */
|
||
VMX_IO_BITMAP_A_FULL,
|
||
VMX_IO_BITMAP_B_FULL,
|
||
VMX_EXIT_MSR_STORE_ADDR_FULL,
|
||
VMX_EXIT_MSR_LOAD_ADDR_FULL,
|
||
VMX_ENTRY_MSR_LOAD_ADDR_FULL,
|
||
VMX_EXECUTIVE_VMCS_PTR_FULL,
|
||
VMX_TSC_OFFSET_FULL,
|
||
VMX_VIRTUAL_APIC_PAGE_ADDR_FULL,
|
||
VMX_APIC_ACCESS_ADDR_FULL,
|
||
VMX_VMREAD_BITMAP_FULL,
|
||
VMX_VMWRITE_BITMAP_FULL,
|
||
VMX_XSS_EXITING_BITMAP_FULL,
|
||
VMX_TSC_MULTIPLIER_FULL,
|
||
VMX_GUEST_PHYSICAL_ADDR_FULL,
|
||
VMX_VMS_LINK_PTR_FULL,
|
||
VMX_GUEST_IA32_DEBUGCTL_FULL,
|
||
VMX_GUEST_IA32_PAT_FULL,
|
||
VMX_GUEST_IA32_EFER_FULL,
|
||
VMX_GUEST_IA32_PERF_CTL_FULL,
|
||
VMX_GUEST_PDPTE0_FULL,
|
||
VMX_GUEST_PDPTE1_FULL,
|
||
VMX_GUEST_PDPTE2_FULL,
|
||
VMX_GUEST_PDPTE3_FULL,
|
||
|
||
/* 32-bits */
|
||
VMX_PIN_VM_EXEC_CONTROLS,
|
||
VMX_PROC_VM_EXEC_CONTROLS,
|
||
VMX_EXCEPTION_BITMAP,
|
||
VMX_PF_ERROR_CODE_MASK,
|
||
VMX_PF_ERROR_CODE_MATCH,
|
||
VMX_CR3_TARGET_COUNT,
|
||
VMX_EXIT_MSR_STORE_COUNT,
|
||
VMX_EXIT_MSR_LOAD_COUNT,
|
||
VMX_ENTRY_MSR_LOAD_COUNT,
|
||
VMX_ENTRY_INT_INFO_FIELD,
|
||
VMX_ENTRY_EXCEPTION_ERROR_CODE,
|
||
VMX_ENTRY_INSTR_LENGTH,
|
||
VMX_TPR_THRESHOLD,
|
||
VMX_PROC_VM_EXEC_CONTROLS2,
|
||
VMX_PLE_GAP,
|
||
VMX_PLE_WINDOW,
|
||
VMX_INSTR_ERROR,
|
||
VMX_EXIT_REASON,
|
||
VMX_EXIT_INT_INFO,
|
||
VMX_EXIT_INT_ERROR_CODE,
|
||
VMX_IDT_VEC_INFO_FIELD,
|
||
VMX_IDT_VEC_ERROR_CODE,
|
||
VMX_EXIT_INSTR_LEN,
|
||
VMX_INSTR_INFO,
|
||
VMX_GUEST_ES_LIMIT,
|
||
VMX_GUEST_CS_LIMIT,
|
||
VMX_GUEST_SS_LIMIT,
|
||
VMX_GUEST_DS_LIMIT,
|
||
VMX_GUEST_FS_LIMIT,
|
||
VMX_GUEST_GS_LIMIT,
|
||
VMX_GUEST_LDTR_LIMIT,
|
||
VMX_GUEST_TR_LIMIT,
|
||
VMX_GUEST_GDTR_LIMIT,
|
||
VMX_GUEST_IDTR_LIMIT,
|
||
VMX_GUEST_ES_ATTR,
|
||
VMX_GUEST_CS_ATTR,
|
||
VMX_GUEST_SS_ATTR,
|
||
VMX_GUEST_DS_ATTR,
|
||
VMX_GUEST_FS_ATTR,
|
||
VMX_GUEST_GS_ATTR,
|
||
VMX_GUEST_LDTR_ATTR,
|
||
VMX_GUEST_TR_ATTR,
|
||
VMX_GUEST_INTERRUPTIBILITY_INFO,
|
||
VMX_GUEST_ACTIVITY_STATE,
|
||
VMX_GUEST_SMBASE,
|
||
VMX_GUEST_IA32_SYSENTER_CS,
|
||
VMX_GUEST_TIMER,
|
||
VMX_CR0_GUEST_HOST_MASK,
|
||
VMX_CR4_GUEST_HOST_MASK,
|
||
VMX_CR0_READ_SHADOW,
|
||
VMX_CR4_READ_SHADOW,
|
||
VMX_CR3_TARGET_0,
|
||
VMX_CR3_TARGET_1,
|
||
VMX_CR3_TARGET_2,
|
||
VMX_CR3_TARGET_3,
|
||
VMX_EXIT_QUALIFICATION,
|
||
VMX_IO_RCX,
|
||
VMX_IO_RSI,
|
||
VMX_IO_RDI,
|
||
VMX_IO_RIP,
|
||
VMX_GUEST_LINEAR_ADDR,
|
||
VMX_GUEST_CR0,
|
||
VMX_GUEST_CR3,
|
||
VMX_GUEST_CR4,
|
||
VMX_GUEST_ES_BASE,
|
||
VMX_GUEST_CS_BASE,
|
||
VMX_GUEST_SS_BASE,
|
||
VMX_GUEST_DS_BASE,
|
||
VMX_GUEST_FS_BASE,
|
||
VMX_GUEST_GS_BASE,
|
||
VMX_GUEST_LDTR_BASE,
|
||
VMX_GUEST_TR_BASE,
|
||
VMX_GUEST_GDTR_BASE,
|
||
VMX_GUEST_IDTR_BASE,
|
||
VMX_GUEST_DR7,
|
||
VMX_GUEST_RSP,
|
||
VMX_GUEST_RIP,
|
||
VMX_GUEST_RFLAGS,
|
||
VMX_GUEST_PENDING_DEBUG_EXCEPT,
|
||
VMX_GUEST_IA32_SYSENTER_ESP,
|
||
VMX_GUEST_IA32_SYSENTER_EIP
|
||
};
|
||
|
||
/* to be shared by all vCPUs for all nested guests */
|
||
static uint64_t vmcs_shadowing_bitmap[PAGE_SIZE / sizeof(uint64_t)] __aligned(PAGE_SIZE);
|
||
|
||
static void setup_vmcs_shadowing_bitmap(void)
|
||
{
|
||
uint16_t field_index;
|
||
uint32_t array_index;
|
||
uint16_t bit_pos;
|
||
|
||
/*
|
||
* Set all the bits to 1s first and clear out the bits for
|
||
* the corresponding fields that ACRN lets its guest to access Shadow VMCS
|
||
*/
|
||
memset((void *)vmcs_shadowing_bitmap, 0xFFU, PAGE_SIZE);
|
||
|
||
/*
|
||
* Refer to ISDM Section 24.6.15 VMCS Shadowing Bitmap Addresses
|
||
* and Section 30.3 VMX Instructions - VMWRITE/VMREAD
|
||
*/
|
||
for (field_index = 0U; field_index < MAX_SHADOW_VMCS_FIELDS; field_index++) {
|
||
bit_pos = vmcs_shadowing_fields[field_index] % 64U;
|
||
array_index = vmcs_shadowing_fields[field_index] / 64U;
|
||
bitmap_clear_nolock(bit_pos, &vmcs_shadowing_bitmap[array_index]);
|
||
}
|
||
}
|
||
|
||
/*
|
||
* This is an array of offsets into a structure of type "struct acrn_vmcs12"
|
||
* 16 offsets for a total of 16 GROUPs. 4 "field widths" by 4 "field types".
|
||
* "Field type" is either Control, Read-Only Data, Guest State or Host State.
|
||
* Refer to the definition of "struct acrn_vmcs12" on how the fields are
|
||
* grouped together for these offsets to work in tandem.
|
||
* Refer to Intel SDM Appendix B Field Encoding in VMCS for info on how
|
||
* fields are grouped and indexed within a group.
|
||
*/
|
||
static const uint16_t vmcs12_group_offset_table[16] = {
|
||
offsetof(struct acrn_vmcs12, vpid), /* 16-bit Control Fields */
|
||
offsetof(struct acrn_vmcs12, padding), /* 16-bit Read-Only Fields */
|
||
offsetof(struct acrn_vmcs12, guest_es), /* 16-bit Guest-State Fields */
|
||
offsetof(struct acrn_vmcs12, host_es), /* 16-bit Host-State Fields */
|
||
offsetof(struct acrn_vmcs12, io_bitmap_a), /* 64-bit Control Fields */
|
||
offsetof(struct acrn_vmcs12, guest_phys_addr), /* 64-bit Read-Only Data Fields */
|
||
offsetof(struct acrn_vmcs12, vmcs_link_ptr), /* 64-bit Guest-State Fields */
|
||
offsetof(struct acrn_vmcs12, host_ia32_pat), /* 64-bit Host-State Fields */
|
||
offsetof(struct acrn_vmcs12, pin_based_exec_ctrl), /* 32-bit Control Fields */
|
||
offsetof(struct acrn_vmcs12, vm_instr_error), /* 32-bit Read-Only Data Fields */
|
||
offsetof(struct acrn_vmcs12, guest_es_limit), /* 32-bit Guest-State Fields */
|
||
offsetof(struct acrn_vmcs12, host_ia32_sysenter_cs), /* 32-bit Host-State Fields */
|
||
offsetof(struct acrn_vmcs12, cr0_guest_host_mask), /* Natural-width Control Fields */
|
||
offsetof(struct acrn_vmcs12, exit_qual), /* Natural-width Read-Only Data Fields */
|
||
offsetof(struct acrn_vmcs12, guest_cr0), /* Natural-width Guest-State Fields */
|
||
offsetof(struct acrn_vmcs12, host_cr0), /* Natural-width Host-State Fields */
|
||
};
|
||
|
||
/*
|
||
* field_idx is the index of the field within the group.
|
||
*
|
||
* Access-type is 0 for all widths except for 64-bit
|
||
* For 64-bit if Access-type is 1, offset is moved to
|
||
* high 4 bytes of the field.
|
||
*/
|
||
#define OFFSET_INTO_VMCS12(group_idx, field_idx, width_in_bytes, access_type) \
|
||
(vmcs12_group_offset_table[group_idx] + \
|
||
field_idx * width_in_bytes + \
|
||
access_type * sizeof(uint32_t))
|
||
|
||
/* Given a vmcs field, this API returns the offset into "struct acrn_vmcs12" */
|
||
static uint16_t vmcs_field_to_vmcs12_offset(uint32_t vmcs_field)
|
||
{
|
||
/*
|
||
* Refer to Appendix B Field Encoding in VMCS in SDM
|
||
* A value of group index 0001b is not valid because there are no 16-bit
|
||
* Read-Only fields.
|
||
*
|
||
* TODO: check invalid VMCS field
|
||
*/
|
||
uint16_t group_idx = (VMX_VMCS_FIELD_WIDTH(vmcs_field) << 2U) | VMX_VMCS_FIELD_TYPE(vmcs_field);
|
||
uint8_t field_width = VMX_VMCS_FIELD_WIDTH(vmcs_field);
|
||
uint8_t width_in_bytes;
|
||
|
||
if (field_width == VMX_VMCS_FIELD_WIDTH_16) {
|
||
width_in_bytes = 2U;
|
||
} else if (field_width == VMX_VMCS_FIELD_WIDTH_32) {
|
||
width_in_bytes = 4U;
|
||
} else {
|
||
/*
|
||
* Natural-width or 64-bit
|
||
*/
|
||
width_in_bytes = 8U;
|
||
}
|
||
|
||
return OFFSET_INTO_VMCS12(group_idx,
|
||
VMX_VMCS_FIELD_INDEX(vmcs_field), width_in_bytes, /* field index within the group */
|
||
VMX_VMCS_FIELD_ACCESS_HIGH(vmcs_field));
|
||
}
|
||
|
||
/*
|
||
* Given a vmcs field and the pointer to the vmcs12, this API returns the
|
||
* corresponding value from the VMCS
|
||
*/
|
||
static uint64_t vmcs12_read_field(void *vmcs_hva, uint32_t field)
|
||
{
|
||
uint64_t *ptr = (uint64_t *)(vmcs_hva + vmcs_field_to_vmcs12_offset(field));
|
||
uint64_t val64 = 0UL;
|
||
|
||
switch (VMX_VMCS_FIELD_WIDTH(field)) {
|
||
case VMX_VMCS_FIELD_WIDTH_16:
|
||
val64 = *(uint16_t *)ptr;
|
||
break;
|
||
case VMX_VMCS_FIELD_WIDTH_32:
|
||
val64 = *(uint32_t *)ptr;
|
||
break;
|
||
case VMX_VMCS_FIELD_WIDTH_64:
|
||
if (!!VMX_VMCS_FIELD_ACCESS_HIGH(field)) {
|
||
val64 = *(uint32_t *)ptr;
|
||
} else {
|
||
val64 = *ptr;
|
||
}
|
||
break;
|
||
case VMX_VMCS_FIELD_WIDTH_NATURAL:
|
||
default:
|
||
val64 = *ptr;
|
||
break;
|
||
}
|
||
|
||
return val64;
|
||
}
|
||
|
||
/*
|
||
* Write the given VMCS field to the given vmcs12 data structure.
|
||
*/
|
||
static void vmcs12_write_field(void *vmcs_hva, uint32_t field, uint64_t val64)
|
||
{
|
||
uint64_t *ptr = (uint64_t *)(vmcs_hva + vmcs_field_to_vmcs12_offset(field));
|
||
|
||
switch (VMX_VMCS_FIELD_WIDTH(field)) {
|
||
case VMX_VMCS_FIELD_WIDTH_16:
|
||
*(uint16_t *)ptr = (uint16_t)val64;
|
||
break;
|
||
case VMX_VMCS_FIELD_WIDTH_32:
|
||
*(uint32_t *)ptr = (uint32_t)val64;
|
||
break;
|
||
case VMX_VMCS_FIELD_WIDTH_64:
|
||
if (!!VMX_VMCS_FIELD_ACCESS_HIGH(field)) {
|
||
*(uint32_t *)ptr = (uint32_t)val64;
|
||
} else {
|
||
*ptr = val64;
|
||
}
|
||
break;
|
||
case VMX_VMCS_FIELD_WIDTH_NATURAL:
|
||
default:
|
||
*ptr = val64;
|
||
break;
|
||
}
|
||
}
|
||
|
||
void nested_vmx_result(enum VMXResult result, int error_number)
|
||
{
|
||
uint64_t rflags = exec_vmread(VMX_GUEST_RFLAGS);
|
||
|
||
/* ISDM: section 30.2 CONVENTIONS */
|
||
rflags &= ~(RFLAGS_C | RFLAGS_P | RFLAGS_A | RFLAGS_Z | RFLAGS_S | RFLAGS_O);
|
||
|
||
if (result == VMfailValid) {
|
||
rflags |= RFLAGS_Z;
|
||
exec_vmwrite(VMX_INSTR_ERROR, error_number);
|
||
} else if (result == VMfailInvalid) {
|
||
rflags |= RFLAGS_C;
|
||
} else {
|
||
/* VMsucceed, do nothing */
|
||
}
|
||
|
||
if (result != VMsucceed) {
|
||
pr_err("VMX failed: %d/%d", result, error_number);
|
||
}
|
||
|
||
exec_vmwrite(VMX_GUEST_RFLAGS, rflags);
|
||
}
|
||
|
||
/**
|
||
* @brief get the memory-address operand of a vmx instruction
|
||
*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static uint64_t get_vmx_memory_operand(struct acrn_vcpu *vcpu, uint32_t instr_info)
|
||
{
|
||
uint64_t gva, gpa, seg_base = 0UL;
|
||
uint32_t seg, err_code = 0U;
|
||
uint64_t offset;
|
||
|
||
/*
|
||
* According to ISDM 3B: Basic VM-Exit Information: For INVEPT, INVPCID, INVVPID, LGDT,
|
||
* LIDT, LLDT, LTR, SGDT, SIDT, SLDT, STR, VMCLEAR, VMPTRLD, VMPTRST, VMREAD, VMWRITE,
|
||
* VMXON, XRSTORS, and XSAVES, the exit qualification receives the value of the instruction’s
|
||
* displacement field, which is sign-extended to 64 bits.
|
||
*/
|
||
offset = vcpu->arch.exit_qualification;
|
||
|
||
/* TODO: should we consider the cases of address size (bits 9:7 in instr_info) is 16 or 32? */
|
||
|
||
/*
|
||
* refer to ISDM Vol.1-3-24 Operand addressing on how to calculate an effective address
|
||
* offset = base + [index * scale] + displacement
|
||
* address = segment_base + offset
|
||
*/
|
||
if (VMX_II_BASE_REG_VALID(instr_info)) {
|
||
offset += vcpu_get_gpreg(vcpu, VMX_II_BASE_REG(instr_info));
|
||
}
|
||
|
||
if (VMX_II_IDX_REG_VALID(instr_info)) {
|
||
uint64_t val64 = vcpu_get_gpreg(vcpu, VMX_II_IDX_REG(instr_info));
|
||
offset += (val64 << VMX_II_SCALING(instr_info));
|
||
}
|
||
|
||
/*
|
||
* In 64-bit mode, the processor treats the segment base of CS, DS, ES, SS as zero,
|
||
* creating a linear address that is equal to the effective address.
|
||
* The exceptions are the FS and GS segments, whose segment registers can be used as
|
||
* additional base registers in some linear address calculations.
|
||
*/
|
||
seg = VMX_II_SEG_REG(instr_info);
|
||
if (seg == 4U) {
|
||
seg_base = exec_vmread(VMX_GUEST_FS_BASE);
|
||
}
|
||
|
||
if (seg == 5U) {
|
||
seg_base = exec_vmread(VMX_GUEST_GS_BASE);
|
||
}
|
||
|
||
gva = seg_base + offset;
|
||
(void)gva2gpa(vcpu, gva, &gpa, &err_code);
|
||
|
||
return gpa;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static uint64_t get_vmptr_gpa(struct acrn_vcpu *vcpu)
|
||
{
|
||
uint64_t gpa, vmptr;
|
||
|
||
/* get VMX pointer, which points to the VMCS or VMXON region GPA */
|
||
gpa = get_vmx_memory_operand(vcpu, exec_vmread(VMX_INSTR_INFO));
|
||
|
||
/* get the address (GPA) of the VMCS for VMPTRLD/VMCLEAR, or VMXON region for VMXON */
|
||
(void)copy_from_gpa(vcpu->vm, (void *)&vmptr, gpa, sizeof(uint64_t));
|
||
|
||
return vmptr;
|
||
}
|
||
|
||
static bool validate_vmptr_gpa(uint64_t vmptr_gpa)
|
||
{
|
||
/* We don't emulate CPUID.80000008H for guests, so check with physical address width */
|
||
struct cpuinfo_x86 *cpu_info = get_pcpu_info();
|
||
|
||
return (mem_aligned_check(vmptr_gpa, PAGE_SIZE) && ((vmptr_gpa >> cpu_info->phys_bits) == 0UL));
|
||
}
|
||
|
||
/**
|
||
* @pre vm != NULL
|
||
*/
|
||
static bool validate_vmcs_revision_id(struct acrn_vcpu *vcpu, uint64_t vmptr_gpa)
|
||
{
|
||
uint32_t revision_id;
|
||
|
||
(void)copy_from_gpa(vcpu->vm, (void *)&revision_id, vmptr_gpa, sizeof(uint32_t));
|
||
|
||
/*
|
||
* VMCS revision ID must equal to what reported by the emulated IA32_VMX_BASIC MSR.
|
||
* The MSB of VMCS12_REVISION_ID is always smaller than 31, so the following statement
|
||
* implicitly validates revision_id[31] as well.
|
||
*/
|
||
return (revision_id == VMCS12_REVISION_ID);
|
||
}
|
||
|
||
int32_t get_guest_cpl(void)
|
||
{
|
||
/*
|
||
* We get CPL from SS.DPL because:
|
||
*
|
||
* CS.DPL could not equal to the CPL for conforming code segments. ISDM 5.5 PRIVILEGE LEVELS:
|
||
* Conforming code segments can be accessed from any privilege level that is equal to or
|
||
* numerically greater (less privileged) than the DPL of the conforming code segment.
|
||
*
|
||
* ISDM 24.4.1 Guest Register State: The value of the DPL field for SS is always
|
||
* equal to the logical processor’s current privilege level (CPL).
|
||
*/
|
||
uint32_t ar = exec_vmread32(VMX_GUEST_SS_ATTR);
|
||
return ((ar >> 5) & 3);
|
||
}
|
||
|
||
static bool validate_nvmx_cr0_cr4(uint64_t cr0_4, uint64_t fixed0, uint64_t fixed1)
|
||
{
|
||
bool valid = true;
|
||
|
||
/* If bit X is 1 in IA32_VMX_CR0/4_FIXED0, then that bit of CR0/4 is fixed to 1 in VMX operation */
|
||
if ((cr0_4 & fixed0) != fixed0) {
|
||
valid = false;
|
||
}
|
||
|
||
/* if bit X is 0 in IA32_VMX_CR0/4_FIXED1, then that bit of CR0/4 is fixed to 0 in VMX operation */
|
||
/* Bits 63:32 of CR0 and CR4 are reserved and must be written with zeros */
|
||
if ((uint32_t)(~cr0_4 & ~fixed1) != (uint32_t)~fixed1) {
|
||
valid = false;
|
||
}
|
||
|
||
return valid;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static bool validate_nvmx_cr0(struct acrn_vcpu *vcpu)
|
||
{
|
||
return validate_nvmx_cr0_cr4(vcpu_get_cr0(vcpu), msr_read(MSR_IA32_VMX_CR0_FIXED0),
|
||
msr_read(MSR_IA32_VMX_CR0_FIXED1));
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static bool validate_nvmx_cr4(struct acrn_vcpu *vcpu)
|
||
{
|
||
return validate_nvmx_cr0_cr4(vcpu_get_cr4(vcpu), msr_read(MSR_IA32_VMX_CR4_FIXED0),
|
||
msr_read(MSR_IA32_VMX_CR4_FIXED1));
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static void reset_vvmcs(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_vvmcs *vvmcs;
|
||
uint32_t idx;
|
||
|
||
vcpu->arch.nested.current_vvmcs = NULL;
|
||
|
||
for (idx = 0U; idx < MAX_ACTIVE_VVMCS_NUM; idx++) {
|
||
vvmcs = &vcpu->arch.nested.vvmcs[idx];
|
||
vvmcs->host_state_dirty = false;
|
||
vvmcs->control_fields_dirty = false;
|
||
vvmcs->vmcs12_gpa = INVALID_GPA;
|
||
vvmcs->ref_cnt = 0;
|
||
|
||
(void)memset(vvmcs->vmcs02, 0U, PAGE_SIZE);
|
||
(void)memset(&vvmcs->vmcs12, 0U, sizeof(struct acrn_vmcs12));
|
||
}
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmxon_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
const uint64_t features = MSR_IA32_FEATURE_CONTROL_LOCK | MSR_IA32_FEATURE_CONTROL_VMX_NO_SMX;
|
||
uint32_t ar = exec_vmread32(VMX_GUEST_CS_ATTR);
|
||
|
||
if (is_nvmx_configured(vcpu->vm)) {
|
||
if (((vcpu_get_cr0(vcpu) & CR0_PE) == 0UL)
|
||
|| ((vcpu_get_cr4(vcpu) & CR4_VMXE) == 0UL)
|
||
|| ((vcpu_get_rflags(vcpu) & RFLAGS_VM) != 0U)) {
|
||
vcpu_inject_ud(vcpu);
|
||
} else if (((vcpu_get_efer(vcpu) & MSR_IA32_EFER_LMA_BIT) == 0U)
|
||
|| ((ar & (1U << 13U)) == 0U)) {
|
||
/* Current ACRN doesn't support 32 bits L1 hypervisor */
|
||
vcpu_inject_ud(vcpu);
|
||
} else if ((get_guest_cpl() != 0)
|
||
|| !validate_nvmx_cr0(vcpu)
|
||
|| !validate_nvmx_cr4(vcpu)
|
||
|| ((vcpu_get_guest_msr(vcpu, MSR_IA32_FEATURE_CONTROL) & features) != features)) {
|
||
vcpu_inject_gp(vcpu, 0U);
|
||
} else if (vcpu->arch.nested.vmxon == true) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
|
||
} else {
|
||
uint64_t vmptr_gpa = get_vmptr_gpa(vcpu);
|
||
|
||
if (!validate_vmptr_gpa(vmptr_gpa)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else if (!validate_vmcs_revision_id(vcpu, vmptr_gpa)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else {
|
||
vcpu->arch.nested.vmxon = true;
|
||
vcpu->arch.nested.in_l2_guest = false;
|
||
vcpu->arch.nested.vmxon_ptr = vmptr_gpa;
|
||
|
||
reset_vvmcs(vcpu);
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
} else {
|
||
vcpu_inject_ud(vcpu);
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
bool check_vmx_permission(struct acrn_vcpu *vcpu)
|
||
{
|
||
bool permit = true;
|
||
|
||
/* If this VM is not nVMX enabled, it implies that 'vmxon == false' */
|
||
if ((vcpu->arch.nested.vmxon == false)
|
||
|| ((vcpu_get_cr0(vcpu) & CR0_PE) == 0UL)
|
||
|| ((vcpu_get_rflags(vcpu) & RFLAGS_VM) != 0U)) {
|
||
/* We rely on hardware to check "IA32_EFER.LMA = 1 and CS.L = 0" */
|
||
vcpu_inject_ud(vcpu);
|
||
permit = false;
|
||
} else if (get_guest_cpl() != 0) {
|
||
vcpu_inject_gp(vcpu, 0U);
|
||
permit = false;
|
||
}
|
||
|
||
return permit;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre vcpu->vm != NULL
|
||
*/
|
||
int32_t vmxoff_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
if (check_vmx_permission(vcpu)) {
|
||
disable_vmcs_shadowing();
|
||
|
||
vcpu->arch.nested.vmxon = false;
|
||
vcpu->arch.nested.in_l2_guest = false;
|
||
|
||
reset_vvmcs(vcpu);
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* Only VMCS fields of width 64-bit, 32-bit, and natural-width can be
|
||
* read-only. A value of 1 in bits [11:10] of these field encodings
|
||
* indicates a read-only field. ISDM Appendix B.
|
||
*/
|
||
static inline bool is_ro_vmcs_field(uint32_t field)
|
||
{
|
||
const uint8_t w = VMX_VMCS_FIELD_WIDTH(field);
|
||
return (VMX_VMCS_FIELD_WIDTH_16 != w) && (VMX_VMCS_FIELD_TYPE(field) == 1U);
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static struct acrn_vvmcs *lookup_vvmcs(struct acrn_vcpu *vcpu, uint64_t vmcs12_gpa)
|
||
{
|
||
struct acrn_vvmcs *vvmcs = NULL;
|
||
uint32_t idx;
|
||
|
||
for (idx = 0U; idx < MAX_ACTIVE_VVMCS_NUM; idx++) {
|
||
if (vcpu->arch.nested.vvmcs[idx].vmcs12_gpa == vmcs12_gpa) {
|
||
vvmcs = &vcpu->arch.nested.vvmcs[idx];
|
||
break;
|
||
}
|
||
}
|
||
|
||
return vvmcs;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static struct acrn_vvmcs *get_or_replace_vvmcs_entry(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_nested *nested = &vcpu->arch.nested;
|
||
struct acrn_vvmcs *vvmcs = NULL;
|
||
uint32_t idx, min_cnt = ~0U;
|
||
|
||
/* look for an inactive entry first */
|
||
for (idx = 0U; idx < MAX_ACTIVE_VVMCS_NUM; idx++) {
|
||
if (nested->vvmcs[idx].vmcs12_gpa == INVALID_GPA) {
|
||
/* found an inactive vvmcs[] entry. */
|
||
vvmcs = &nested->vvmcs[idx];
|
||
break;
|
||
}
|
||
}
|
||
|
||
/* In case we have to release an active entry to make room for the new VMCS12 */
|
||
if (vvmcs == NULL) {
|
||
for (idx = 0U; idx < MAX_ACTIVE_VVMCS_NUM; idx++) {
|
||
/* look for the entry with least reference count */
|
||
if (nested->vvmcs[idx].ref_cnt < min_cnt) {
|
||
min_cnt = nested->vvmcs[idx].ref_cnt;
|
||
vvmcs = &nested->vvmcs[idx];
|
||
}
|
||
}
|
||
|
||
clear_vvmcs(vcpu, vvmcs);
|
||
}
|
||
|
||
/* reset ref_cnt for all entries */
|
||
for (idx = 0U; idx < MAX_ACTIVE_VVMCS_NUM; idx++) {
|
||
nested->vvmcs[idx].ref_cnt = 0U;
|
||
}
|
||
|
||
return vvmcs;
|
||
}
|
||
|
||
/*
|
||
* @brief emulate VMREAD instruction from L1
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmread_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_vvmcs *cur_vvmcs = vcpu->arch.nested.current_vvmcs;
|
||
const uint32_t info = exec_vmread(VMX_INSTR_INFO);
|
||
uint64_t vmcs_value, gpa;
|
||
uint32_t vmcs_field;
|
||
|
||
if (check_vmx_permission(vcpu)) {
|
||
if ((cur_vvmcs == NULL) || (cur_vvmcs->vmcs12_gpa == INVALID_GPA)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else {
|
||
/* TODO: VMfailValid for invalid VMCS fields */
|
||
vmcs_field = (uint32_t)vcpu_get_gpreg(vcpu, VMX_II_REG2(info));
|
||
vmcs_value = vmcs12_read_field(&cur_vvmcs->vmcs12, vmcs_field);
|
||
|
||
/* Currently ACRN doesn't support 32bits L1 hypervisor, assuming operands are 64 bits */
|
||
if (VMX_II_IS_REG(info)) {
|
||
vcpu_set_gpreg(vcpu, VMX_II_REG1(info), vmcs_value);
|
||
} else {
|
||
gpa = get_vmx_memory_operand(vcpu, info);
|
||
(void)copy_to_gpa(vcpu->vm, &vmcs_value, gpa, 8U);
|
||
}
|
||
|
||
pr_dbg("vmcs_field: %x vmcs_value: %llx", vmcs_field, vmcs_value);
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @brief emulate VMWRITE instruction from L1
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmwrite_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_vvmcs *cur_vvmcs = vcpu->arch.nested.current_vvmcs;
|
||
const uint32_t info = exec_vmread(VMX_INSTR_INFO);
|
||
uint64_t vmcs_value, gpa;
|
||
uint32_t vmcs_field;
|
||
|
||
if (check_vmx_permission(vcpu)) {
|
||
if ((cur_vvmcs == NULL) || (cur_vvmcs->vmcs12_gpa == INVALID_GPA)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else {
|
||
/* TODO: VMfailValid for invalid VMCS fields */
|
||
vmcs_field = (uint32_t)vcpu_get_gpreg(vcpu, VMX_II_REG2(info));
|
||
|
||
if (is_ro_vmcs_field(vmcs_field) &&
|
||
((vcpu_get_guest_msr(vcpu, MSR_IA32_VMX_MISC) & (1UL << 29U)) == 0UL)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMWRITE_RO_COMPONENT);
|
||
} else {
|
||
/* Currently not support 32bits L1 hypervisor, assuming operands are 64 bits */
|
||
if (VMX_II_IS_REG(info)) {
|
||
vmcs_value = vcpu_get_gpreg(vcpu, VMX_II_REG1(info));
|
||
} else {
|
||
gpa = get_vmx_memory_operand(vcpu, info);
|
||
(void)copy_from_gpa(vcpu->vm, &vmcs_value, gpa, 8U);
|
||
}
|
||
|
||
if (VMX_VMCS_FIELD_TYPE(vmcs_field) == VMX_VMCS_FIELD_TYPE_HOST) {
|
||
cur_vvmcs->host_state_dirty = true;
|
||
}
|
||
|
||
if ((vmcs_field == VMX_MSR_BITMAP_FULL)
|
||
|| (vmcs_field == VMX_EPT_POINTER_FULL)
|
||
|| (vmcs_field == VMX_VPID)
|
||
|| (vmcs_field == VMX_ENTRY_CONTROLS)
|
||
|| (vmcs_field == VMX_EXIT_CONTROLS)) {
|
||
cur_vvmcs->control_fields_dirty = true;
|
||
|
||
if (vmcs_field == VMX_EPT_POINTER_FULL) {
|
||
if (cur_vvmcs->vmcs12.ept_pointer != vmcs_value) {
|
||
put_vept_desc(cur_vvmcs->vmcs12.ept_pointer);
|
||
get_vept_desc(vmcs_value);
|
||
}
|
||
}
|
||
}
|
||
|
||
pr_dbg("vmcs_field: %x vmcs_value: %llx", vmcs_field, vmcs_value);
|
||
vmcs12_write_field(&cur_vvmcs->vmcs12, vmcs_field, vmcs_value);
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/**
|
||
* @brief Sync shadow fields from vmcs02 to cache VMCS12
|
||
*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs02 is current
|
||
*/
|
||
static void sync_vmcs02_to_vmcs12(struct acrn_vmcs12 *vmcs12)
|
||
{
|
||
uint64_t val64;
|
||
uint32_t idx;
|
||
|
||
for (idx = 0; idx < MAX_SHADOW_VMCS_FIELDS; idx++) {
|
||
val64 = exec_vmread(vmcs_shadowing_fields[idx]);
|
||
vmcs12_write_field(vmcs12, vmcs_shadowing_fields[idx], val64);
|
||
}
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre VMCS02 (as an ordinary VMCS) is current
|
||
*/
|
||
static void merge_and_sync_control_fields(struct acrn_vcpu *vcpu, struct acrn_vmcs12 *vmcs12)
|
||
{
|
||
uint64_t value64;
|
||
|
||
/* Sync VMCS fields that are not shadowing. Don't need to sync these fields back to VMCS12. */
|
||
|
||
exec_vmwrite(VMX_MSR_BITMAP_FULL, gpa2hpa(vcpu->vm, vmcs12->msr_bitmap));
|
||
exec_vmwrite(VMX_EPT_POINTER_FULL, get_shadow_eptp(vmcs12->ept_pointer));
|
||
|
||
/* For VM-execution, entry and exit controls */
|
||
value64 = vmcs12->vm_entry_controls;
|
||
if ((value64 & VMX_ENTRY_CTLS_LOAD_EFER) != VMX_ENTRY_CTLS_LOAD_EFER) {
|
||
/*
|
||
* L1 hypervisor wishes to use its IA32_EFER for L2 guest so we turn on the
|
||
* VMX_ENTRY_CTLS_LOAD_EFER on VMCS02.
|
||
*/
|
||
value64 |= VMX_ENTRY_CTLS_LOAD_EFER;
|
||
exec_vmwrite(VMX_GUEST_IA32_EFER_FULL, vcpu_get_efer(vcpu));
|
||
}
|
||
|
||
exec_vmwrite(VMX_ENTRY_CONTROLS, value64);
|
||
|
||
/* Host is alway runing in 64-bit mode */
|
||
value64 = vmcs12->vm_exit_controls | VMX_EXIT_CTLS_HOST_ADDR64;
|
||
exec_vmwrite(VMX_EXIT_CONTROLS, value64);
|
||
|
||
exec_vmwrite(VMX_VPID, vmcs12->vpid);
|
||
}
|
||
|
||
/**
|
||
* @brief Sync shadow fields from vmcs12 to vmcs02
|
||
*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs02 is current
|
||
*/
|
||
static void sync_vmcs12_to_vmcs02(struct acrn_vcpu *vcpu, struct acrn_vmcs12 *vmcs12)
|
||
{
|
||
uint64_t val64;
|
||
uint32_t idx;
|
||
|
||
for (idx = 0; idx < MAX_SHADOW_VMCS_FIELDS; idx++) {
|
||
val64 = vmcs12_read_field(vmcs12, vmcs_shadowing_fields[idx]);
|
||
exec_vmwrite(vmcs_shadowing_fields[idx], val64);
|
||
}
|
||
|
||
merge_and_sync_control_fields(vcpu, vmcs12);
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static void set_vmcs02_shadow_indicator(struct acrn_vvmcs *vvmcs)
|
||
{
|
||
/* vmcs02 is shadowing */
|
||
*((uint32_t*)vvmcs->vmcs02) |= VMCS_SHADOW_BIT_INDICATOR;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs01 is current
|
||
*/
|
||
static void clear_vmcs02_shadow_indicator(struct acrn_vvmcs *vvmcs)
|
||
{
|
||
/* vmcs02 is s an ordinary VMCS */
|
||
*((uint32_t*)vvmcs->vmcs02) &= ~VMCS_SHADOW_BIT_INDICATOR;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs01 is current
|
||
*/
|
||
static void enable_vmcs_shadowing(struct acrn_vvmcs *vvmcs)
|
||
{
|
||
uint32_t val32;
|
||
|
||
/*
|
||
* This method of using the same bitmap for VMRead and VMWrite is not typical.
|
||
* Here we assume L1 hypervisor will not erroneously write to Read-Only fields.
|
||
* TODO: may use different bitmap to exclude read-only fields from VMWRITE bitmap.
|
||
*/
|
||
exec_vmwrite(VMX_VMREAD_BITMAP_FULL, hva2hpa(vmcs_shadowing_bitmap));
|
||
exec_vmwrite(VMX_VMWRITE_BITMAP_FULL, hva2hpa(vmcs_shadowing_bitmap));
|
||
|
||
/* Set VMCS shadowing bit in Secondary Proc Exec Controls */
|
||
val32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2);
|
||
val32 |= VMX_PROCBASED_CTLS2_VMCS_SHADW;
|
||
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, val32);
|
||
|
||
/* Set VMCS Link pointer */
|
||
exec_vmwrite(VMX_VMS_LINK_PTR_FULL, hva2hpa(vvmcs->vmcs02));
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs01 is current
|
||
*/
|
||
static void disable_vmcs_shadowing(void)
|
||
{
|
||
uint32_t val32;
|
||
|
||
/* clear VMCS shadowing bit in Secondary Proc Exec Controls */
|
||
val32 = exec_vmread(VMX_PROC_VM_EXEC_CONTROLS2);
|
||
val32 &= ~VMX_PROCBASED_CTLS2_VMCS_SHADW;
|
||
exec_vmwrite32(VMX_PROC_VM_EXEC_CONTROLS2, val32);
|
||
|
||
exec_vmwrite(VMX_VMS_LINK_PTR_FULL, ~0UL);
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs01 is current
|
||
*/
|
||
static void clear_vvmcs(struct acrn_vcpu *vcpu, struct acrn_vvmcs *vvmcs)
|
||
{
|
||
/*
|
||
* Now VMCS02 is active and being used as a shadow VMCS.
|
||
* Disable VMCS shadowing to avoid VMCS02 will be loaded by VMPTRLD
|
||
* and referenced by VMCS01 as a shadow VMCS simultaneously.
|
||
*/
|
||
disable_vmcs_shadowing();
|
||
|
||
/* Flush shadow VMCS to memory */
|
||
clear_va_vmcs(vvmcs->vmcs02);
|
||
|
||
/* VMPTRLD the shadow VMCS so that we are able to sync it to VMCS12 */
|
||
load_va_vmcs(vvmcs->vmcs02);
|
||
|
||
sync_vmcs02_to_vmcs12(&vvmcs->vmcs12);
|
||
|
||
/* flush cached VMCS12 back to L1 guest */
|
||
(void)copy_to_gpa(vcpu->vm, (void *)&vvmcs->vmcs12, vvmcs->vmcs12_gpa, sizeof(struct acrn_vmcs12));
|
||
|
||
/*
|
||
* The current VMCS12 has been flushed out, so that the active VMCS02
|
||
* needs to be VMCLEARed as well
|
||
*/
|
||
clear_va_vmcs(vvmcs->vmcs02);
|
||
|
||
/* This VMCS can no longer refer to any shadow EPT */
|
||
put_vept_desc(vvmcs->vmcs12.ept_pointer);
|
||
|
||
/* This vvmcs[] entry doesn't cache a VMCS12 any more */
|
||
vvmcs->vmcs12_gpa = INVALID_GPA;
|
||
|
||
/* Cleanup per VVMCS dirty flags */
|
||
vvmcs->host_state_dirty = false;
|
||
vvmcs->control_fields_dirty = false;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmptrld_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_nested *nested = &vcpu->arch.nested;
|
||
struct acrn_vvmcs *vvmcs;
|
||
uint64_t vmcs12_gpa;
|
||
|
||
if (check_vmx_permission(vcpu)) {
|
||
vmcs12_gpa = get_vmptr_gpa(vcpu);
|
||
|
||
if (!validate_vmptr_gpa(vmcs12_gpa)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INVALID_ADDRESS);
|
||
} else if (vmcs12_gpa == nested->vmxon_ptr) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_VMXON_POINTER);
|
||
} else if (!validate_vmcs_revision_id(vcpu, vmcs12_gpa)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
|
||
} else if ((nested->current_vvmcs != NULL) && (nested->current_vvmcs->vmcs12_gpa == vmcs12_gpa)) {
|
||
/* VMPTRLD current VMCS12, do nothing */
|
||
nested_vmx_result(VMsucceed, 0);
|
||
} else {
|
||
vvmcs = lookup_vvmcs(vcpu, vmcs12_gpa);
|
||
if (vvmcs == NULL) {
|
||
vvmcs = get_or_replace_vvmcs_entry(vcpu);
|
||
|
||
/* Create the VMCS02 based on this new VMCS12 */
|
||
|
||
/*
|
||
* initialize VMCS02
|
||
* VMCS revision ID must equal to what reported by IA32_VMX_BASIC MSR
|
||
*/
|
||
(void)memcpy_s(vvmcs->vmcs02, 4U, (void *)&vmx_basic, 4U);
|
||
|
||
/* VMPTRLD VMCS02 so that we can VMWRITE to it */
|
||
load_va_vmcs(vvmcs->vmcs02);
|
||
init_host_state();
|
||
|
||
/* Load VMCS12 from L1 guest memory */
|
||
(void)copy_from_gpa(vcpu->vm, (void *)&vvmcs->vmcs12, vmcs12_gpa,
|
||
sizeof(struct acrn_vmcs12));
|
||
|
||
/* if needed, create nept_desc and allocate shadow root for the EPTP */
|
||
get_vept_desc(vvmcs->vmcs12.ept_pointer);
|
||
|
||
/* Need to load shadow fields from this new VMCS12 to VMCS02 */
|
||
sync_vmcs12_to_vmcs02(vcpu, &vvmcs->vmcs12);
|
||
} else {
|
||
vvmcs->ref_cnt += 1U;
|
||
}
|
||
|
||
/* Before VMCS02 is being used as a shadow VMCS, VMCLEAR it */
|
||
clear_va_vmcs(vvmcs->vmcs02);
|
||
|
||
/*
|
||
* Now VMCS02 is not active, set the shadow-VMCS indicator.
|
||
* At L1 VM entry, VMCS02 will be referenced as a shadow VMCS.
|
||
*/
|
||
set_vmcs02_shadow_indicator(vvmcs);
|
||
|
||
/* Switch back to vmcs01 */
|
||
load_va_vmcs(vcpu->arch.vmcs);
|
||
|
||
/* VMCS02 is referenced by VMCS01 Link Pointer */
|
||
enable_vmcs_shadowing(vvmcs);
|
||
|
||
vvmcs->vmcs12_gpa = vmcs12_gpa;
|
||
nested->current_vvmcs = vvmcs;
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmclear_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_nested *nested = &vcpu->arch.nested;
|
||
struct acrn_vvmcs *vvmcs;
|
||
uint64_t vmcs12_gpa;
|
||
|
||
if (check_vmx_permission(vcpu)) {
|
||
vmcs12_gpa = get_vmptr_gpa(vcpu);
|
||
|
||
if (!validate_vmptr_gpa(vmcs12_gpa)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMPTRLD_INVALID_ADDRESS);
|
||
} else if (vmcs12_gpa == nested->vmxon_ptr) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMCLEAR_VMXON_POINTER);
|
||
} else {
|
||
vvmcs = lookup_vvmcs(vcpu, vmcs12_gpa);
|
||
if (vvmcs != NULL) {
|
||
uint64_t current_vmcs12_gpa = INVALID_GPA;
|
||
|
||
/* Save for comparison */
|
||
if (nested->current_vvmcs) {
|
||
current_vmcs12_gpa = nested->current_vvmcs->vmcs12_gpa;
|
||
}
|
||
|
||
/* VMCLEAR an active VMCS12, may or may not be current */
|
||
vvmcs->vmcs12.launch_state = VMCS12_LAUNCH_STATE_CLEAR;
|
||
clear_vvmcs(vcpu, vvmcs);
|
||
|
||
/* Switch back to vmcs01 (no VMCS shadowing) */
|
||
load_va_vmcs(vcpu->arch.vmcs);
|
||
|
||
if (current_vmcs12_gpa != INVALID_GPA) {
|
||
if (current_vmcs12_gpa == vmcs12_gpa) {
|
||
/* VMCLEAR current VMCS12 */
|
||
nested->current_vvmcs = NULL;
|
||
} else {
|
||
/*
|
||
* VMCLEAR an active but not current VMCS12.
|
||
* VMCS shadowing was cleared earlier in clear_vvmcs()
|
||
*/
|
||
enable_vmcs_shadowing(nested->current_vvmcs);
|
||
}
|
||
} else {
|
||
/* do nothing if there is no current VMCS12 */
|
||
}
|
||
} else {
|
||
/*
|
||
* we need to update the VMCS12 launch state in L1 memory in these two cases:
|
||
* - L1 hypervisor VMCLEAR a VMCS12 that is already flushed by ACRN to L1 guest
|
||
* - L1 hypervisor VMCLEAR a never VMPTRLDed VMCS12.
|
||
*/
|
||
uint32_t launch_state = VMCS12_LAUNCH_STATE_CLEAR;
|
||
(void)copy_to_gpa(vcpu->vm, &launch_state, vmcs12_gpa +
|
||
offsetof(struct acrn_vmcs12, launch_state), sizeof(launch_state));
|
||
}
|
||
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
bool is_vcpu_in_l2_guest(struct acrn_vcpu *vcpu)
|
||
{
|
||
return vcpu->arch.nested.in_l2_guest;
|
||
}
|
||
|
||
/*
|
||
* @pre seg != NULL
|
||
*/
|
||
static void set_segment(struct segment_sel *seg, uint16_t sel, uint64_t b, uint32_t l, uint32_t a)
|
||
{
|
||
seg->selector = sel;
|
||
seg->base = b;
|
||
seg->limit = l;
|
||
seg->attr = a;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre vmcs01 is current
|
||
*/
|
||
static void set_vmcs01_guest_state(struct acrn_vcpu *vcpu)
|
||
{
|
||
/*
|
||
* All host fields are not shadowing, and all VMWRITE to these fields
|
||
* are saved in vmcs12.
|
||
*
|
||
* Load host state from vmcs12 to vmcs01 guest state before entering
|
||
* L1 to emulate VMExit from L2 to L1.
|
||
*
|
||
* We assume host only change these host-state fields in run time.
|
||
*
|
||
* Section 27.5 Loading Host State
|
||
* 1. Load Control Registers, Debug Registers, MSRs
|
||
* 2. Load RSP/RIP/RFLAGS
|
||
* 3. Load Segmentation State
|
||
* 4. Non-Register state
|
||
*/
|
||
struct acrn_vmcs12 *vmcs12 = &vcpu->arch.nested.current_vvmcs->vmcs12;
|
||
struct segment_sel seg;
|
||
|
||
if (vcpu->arch.nested.current_vvmcs->host_state_dirty == true) {
|
||
vcpu->arch.nested.current_vvmcs->host_state_dirty = false;
|
||
|
||
/*
|
||
* We want vcpu_get_cr0/4() can get the up-to-date values, but we don't
|
||
* want to call vcpu_set_cr0/4() to handle the CR0/4 write.
|
||
*/
|
||
exec_vmwrite(VMX_GUEST_CR0, vmcs12->host_cr0);
|
||
exec_vmwrite(VMX_GUEST_CR4, vmcs12->host_cr4);
|
||
bitmap_clear_nolock(CPU_REG_CR0, &vcpu->reg_cached);
|
||
bitmap_clear_nolock(CPU_REG_CR4, &vcpu->reg_cached);
|
||
|
||
exec_vmwrite(VMX_GUEST_CR3, vmcs12->host_cr3);
|
||
exec_vmwrite(VMX_GUEST_DR7, DR7_INIT_VALUE);
|
||
exec_vmwrite64(VMX_GUEST_IA32_DEBUGCTL_FULL, 0UL);
|
||
exec_vmwrite32(VMX_GUEST_IA32_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
|
||
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
|
||
exec_vmwrite(VMX_GUEST_IA32_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
|
||
|
||
exec_vmwrite(VMX_GUEST_IA32_EFER_FULL, vmcs12->host_ia32_efer);
|
||
|
||
/*
|
||
* type: 11 (Execute/Read, accessed)
|
||
* l: 64-bit mode active
|
||
*/
|
||
set_segment(&seg, vmcs12->host_cs, 0UL, 0xFFFFFFFFU, 0xa09bU);
|
||
load_segment(seg, VMX_GUEST_CS);
|
||
|
||
/*
|
||
* type: 3 (Read/Write, accessed)
|
||
* D/B: 1 (32-bit segment)
|
||
*/
|
||
set_segment(&seg, vmcs12->host_ds, 0UL, 0xFFFFFFFFU, 0xc093);
|
||
load_segment(seg, VMX_GUEST_DS);
|
||
|
||
seg.selector = vmcs12->host_ss;
|
||
load_segment(seg, VMX_GUEST_SS);
|
||
|
||
seg.selector = vmcs12->host_es;
|
||
load_segment(seg, VMX_GUEST_ES);
|
||
|
||
seg.selector = vmcs12->host_fs;
|
||
seg.base = vmcs12->host_fs_base;
|
||
load_segment(seg, VMX_GUEST_FS);
|
||
|
||
seg.selector = vmcs12->host_gs;
|
||
seg.base = vmcs12->host_gs_base;
|
||
load_segment(seg, VMX_GUEST_GS);
|
||
|
||
/*
|
||
* ISDM 27.5.2: segment limit for TR is set to 67H
|
||
* Type set to 11 and S set to 0 (busy 32-bit task-state segment).
|
||
*/
|
||
set_segment(&seg, vmcs12->host_tr, vmcs12->host_tr_base, 0x67U, TR_AR);
|
||
load_segment(seg, VMX_GUEST_TR);
|
||
|
||
/*
|
||
* ISDM 27.5.2: LDTR is established as follows on all VM exits:
|
||
* the selector is cleared to 0000H, the segment is marked unusable
|
||
* and is otherwise undefined (although the base address is always canonical).
|
||
*/
|
||
exec_vmwrite16(VMX_GUEST_LDTR_SEL, 0U);
|
||
exec_vmwrite32(VMX_GUEST_LDTR_ATTR, 0x10000U);
|
||
}
|
||
|
||
/*
|
||
* For those registers that are managed by the vcpu->reg_updated flag,
|
||
* need to write with vcpu_set_xxx() so that vcpu_get_xxx() can get the
|
||
* correct values.
|
||
*/
|
||
vcpu_set_rip(vcpu, vmcs12->host_rip);
|
||
vcpu_set_rsp(vcpu, vmcs12->host_rsp);
|
||
vcpu_set_rflags(vcpu, 0x2U);
|
||
}
|
||
|
||
/**
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static void sanitize_l2_vpid(struct acrn_vmcs12 *vmcs12)
|
||
{
|
||
/* Flush VPID if the L2 VPID could be conflicted with any L1 VPIDs */
|
||
if (vmcs12->vpid >= ALLOCATED_MIN_L1_VPID) {
|
||
flush_vpid_single(vmcs12->vpid);
|
||
}
|
||
}
|
||
|
||
/**
|
||
* @brief handler for all VMEXITs from nested guests
|
||
*
|
||
* @pre vcpu != NULL
|
||
* @pre VMCS02 (as an ordinary VMCS) is current
|
||
*/
|
||
int32_t nested_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
struct acrn_vvmcs *cur_vvmcs = vcpu->arch.nested.current_vvmcs;
|
||
bool is_l1_vmexit = true;
|
||
|
||
if ((vcpu->arch.exit_reason & 0xFFFFU) == VMX_EXIT_REASON_EPT_VIOLATION) {
|
||
is_l1_vmexit = handle_l2_ept_violation(vcpu);
|
||
}
|
||
|
||
if (is_l1_vmexit) {
|
||
sanitize_l2_vpid(&cur_vvmcs->vmcs12);
|
||
|
||
/*
|
||
* Clear VMCS02 because: ISDM: Before modifying the shadow-VMCS indicator,
|
||
* software should execute VMCLEAR for the VMCS to ensure that it is not active.
|
||
*/
|
||
clear_va_vmcs(cur_vvmcs->vmcs02);
|
||
set_vmcs02_shadow_indicator(cur_vvmcs);
|
||
|
||
/* Switch to VMCS01, and VMCS02 is referenced as a shadow VMCS */
|
||
load_va_vmcs(vcpu->arch.vmcs);
|
||
|
||
/* Load host state from VMCS12 host area to Guest state of VMCS01 */
|
||
set_vmcs01_guest_state(vcpu);
|
||
|
||
/* vCPU is NOT in guest mode from this point */
|
||
vcpu->arch.nested.in_l2_guest = false;
|
||
}
|
||
|
||
/*
|
||
* For VM-exits that reflect to L1 hypervisor, ACRN can't advance to next guest RIP
|
||
* which is up to the L1 hypervisor to make the decision.
|
||
*
|
||
* The only case that doesn't need to be reflected is EPT violations that can be
|
||
* completely handled by ACRN, which requires L2 VM to re-execute the instruction
|
||
* after the shadow EPT is being properly setup.
|
||
|
||
* In either case, need to set vcpu->arch.inst_len to zero.
|
||
*/
|
||
vcpu_retain_rip(vcpu);
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre VMCS01 is current and VMCS02 is referenced by VMCS Link Pointer
|
||
*/
|
||
static void nested_vmentry(struct acrn_vcpu *vcpu, bool is_launch)
|
||
{
|
||
struct acrn_vvmcs *cur_vvmcs = vcpu->arch.nested.current_vvmcs;
|
||
struct acrn_vmcs12 *vmcs12 = &cur_vvmcs->vmcs12;
|
||
|
||
if ((cur_vvmcs == NULL) || (cur_vvmcs->vmcs12_gpa == INVALID_GPA)) {
|
||
nested_vmx_result(VMfailInvalid, 0);
|
||
} else if (is_launch && (vmcs12->launch_state != VMCS12_LAUNCH_STATE_CLEAR)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMLAUNCH_NONCLEAR_VMCS);
|
||
} else if (!is_launch && (vmcs12->launch_state != VMCS12_LAUNCH_STATE_LAUNCHED)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_VMRESUME_NONLAUNCHED_VMCS);
|
||
} else {
|
||
/*
|
||
* TODO: Need to do VM-Entry checks before L2 VM entry.
|
||
* Refer to ISDM Vol3 VMX Instructions reference.
|
||
*/
|
||
|
||
/*
|
||
* Convert the shadow VMCS to an ordinary VMCS.
|
||
* ISDM: Software should not modify the shadow-VMCS indicator in
|
||
* the VMCS region of a VMCS that is active
|
||
*/
|
||
clear_va_vmcs(cur_vvmcs->vmcs02);
|
||
clear_vmcs02_shadow_indicator(cur_vvmcs);
|
||
|
||
/* as an ordinary VMCS, VMCS02 is active and currernt when L2 guest is running */
|
||
load_va_vmcs(cur_vvmcs->vmcs02);
|
||
|
||
if (cur_vvmcs->control_fields_dirty) {
|
||
cur_vvmcs->control_fields_dirty = false;
|
||
merge_and_sync_control_fields(vcpu, vmcs12);
|
||
}
|
||
|
||
/* vCPU is in guest mode from this point */
|
||
vcpu->arch.nested.in_l2_guest = true;
|
||
|
||
if (is_launch) {
|
||
vmcs12->launch_state = VMCS12_LAUNCH_STATE_LAUNCHED;
|
||
}
|
||
|
||
sanitize_l2_vpid(vmcs12);
|
||
|
||
/*
|
||
* set vcpu->launched to false because the launch state of VMCS02 is
|
||
* clear at this moment, even for VMRESUME
|
||
*/
|
||
vcpu->launched = false;
|
||
}
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmresume_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
if (check_vmx_permission(vcpu)) {
|
||
nested_vmentry(vcpu, false);
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t vmlaunch_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
if (check_vmx_permission(vcpu)) {
|
||
nested_vmentry(vcpu, true);
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
* @pre desc != NULL
|
||
*/
|
||
int64_t get_invvpid_ept_operands(struct acrn_vcpu *vcpu, void *desc, size_t size)
|
||
{
|
||
const uint32_t info = exec_vmread(VMX_INSTR_INFO);
|
||
uint64_t gpa;
|
||
|
||
gpa = get_vmx_memory_operand(vcpu, info);
|
||
(void)copy_from_gpa(vcpu->vm, desc, gpa, size);
|
||
|
||
return vcpu_get_gpreg(vcpu, VMX_II_REG2(info));
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
static bool validate_canonical_addr(struct acrn_vcpu *vcpu, uint64_t va)
|
||
{
|
||
uint32_t addr_width = 48U; /* linear address width */
|
||
uint64_t msb_mask;
|
||
|
||
if (vcpu_get_cr4(vcpu) & CR4_LA57) {
|
||
addr_width = 57U;
|
||
}
|
||
|
||
/*
|
||
* In 64-bit mode, an address is considered to be in canonical form if address
|
||
* bits 63 through to the most-significant implemented bit by the microarchitecture
|
||
* are set to either all ones or all zeros.
|
||
*/
|
||
|
||
msb_mask = ~((1UL << addr_width) - 1UL);
|
||
return ((msb_mask & va) == 0UL) || ((msb_mask & va) == msb_mask);
|
||
}
|
||
|
||
/*
|
||
* @pre vcpu != NULL
|
||
*/
|
||
int32_t invvpid_vmexit_handler(struct acrn_vcpu *vcpu)
|
||
{
|
||
uint32_t supported_types = (vcpu_get_guest_msr(vcpu, MSR_IA32_VMX_EPT_VPID_CAP) >> 40U) & 0xfU;
|
||
struct invvpid_operand desc;
|
||
uint64_t type;
|
||
|
||
if (check_vmx_permission(vcpu)) {
|
||
type = get_invvpid_ept_operands(vcpu, (void *)&desc, sizeof(desc));
|
||
|
||
if ((type > VMX_VPID_TYPE_SINGLE_NON_GLOBAL) || ((supported_types & (1U << type)) == 0)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_INVEPT_INVVPID_INVALID_OPERAND);
|
||
} else if ((desc.rsvd1 != 0U) || (desc.rsvd2 != 0U)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_INVEPT_INVVPID_INVALID_OPERAND);
|
||
} else if ((type != VMX_VPID_TYPE_ALL_CONTEXT) && (desc.vpid == 0U)) {
|
||
/* check VPID for type 0, 1, 3 */
|
||
nested_vmx_result(VMfailValid, VMXERR_INVEPT_INVVPID_INVALID_OPERAND);
|
||
} else if ((type == VMX_VPID_TYPE_INDIVIDUAL_ADDR) && !validate_canonical_addr(vcpu, desc.gva)) {
|
||
nested_vmx_result(VMfailValid, VMXERR_INVEPT_INVVPID_INVALID_OPERAND);
|
||
} else {
|
||
/*
|
||
* VPIDs are pass-thru. Values programmed by L1 are used by L0.
|
||
* INVVPID type, VPID and GLA, operands of INVVPID instruction, are
|
||
* passed as is to the pCPU.
|
||
*/
|
||
asm_invvpid(desc, type);
|
||
nested_vmx_result(VMsucceed, 0);
|
||
}
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
|
||
void init_nested_vmx(__unused struct acrn_vm *vm)
|
||
{
|
||
static bool initialized = false;
|
||
|
||
if (!initialized) {
|
||
initialized = true;
|
||
|
||
/* Cache the value of physical MSR_IA32_VMX_BASIC */
|
||
vmx_basic = (uint32_t)msr_read(MSR_IA32_VMX_BASIC);
|
||
setup_vmcs_shadowing_bitmap();
|
||
}
|
||
}
|