hv: ept: disable execute right on large pages
Issue description: ----------------- Machine Check Error on Page Size Change Instruction fetch may cause machine check error if page size and memory type was changed without invalidation on some processors[1][2]. Malicious guest kernel could trigger this issue. This issue applies to both primary page table and extended page tables (EPT), however the primary page table is controlled by hypervisor only. This patch mitigates the situation in EPT. Mitigation details: ------------------ Implement non-execute huge pages in EPT. This patch series clears the execute permission (bit 2) in the EPT entries for large pages. When EPT violation is triggered by guest instruction fetch, hypervisor converts the large page to smaller 4 KB pages and restore the execute permission, and then re-execute the guest instruction. The current patch turns on the mitigation by default. The follow-up patches will conditionally turn on/off the feature per processor model. [1] Refer to erratum KBL002 in "7th Generation Intel Processor Family and 8th Generation Intel Processor Family for U Quad Core Platforms Specification Update" https://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/7th-gen-core-family-spec-update.pdf [2] Refer to erratum SKL002 in "6th Generation Intel Processor Family Specification Update" https://www.intel.com/content/www/us/en/products/docs/processors/core/desktop-6th-gen-core-family-spec-update.html Tracked-On: #4101 Signed-off-by: Binbin Wu <binbin.wu@intel.com> Reviewed-by: Eddie Dong <eddie.dong@intel.com>
This commit is contained in:
parent
e9b035bab6
commit
fa3888c12a
|
@ -14,6 +14,7 @@
|
|||
#include <vmexit.h>
|
||||
#include <vmx.h>
|
||||
#include <ept.h>
|
||||
#include <pgtable.h>
|
||||
#include <trace.h>
|
||||
#include <logmsg.h>
|
||||
|
||||
|
@ -105,68 +106,81 @@ int32_t ept_violation_vmexit_handler(struct acrn_vcpu *vcpu)
|
|||
|
||||
/* Handle page fault from guest */
|
||||
exit_qual = vcpu->arch.exit_qualification;
|
||||
|
||||
io_req->io_type = REQ_MMIO;
|
||||
|
||||
/* Specify if read or write operation */
|
||||
if ((exit_qual & 0x2UL) != 0UL) {
|
||||
/* Write operation */
|
||||
mmio_req->direction = REQUEST_WRITE;
|
||||
mmio_req->value = 0UL;
|
||||
|
||||
/* XXX: write access while EPT perm RX -> WP */
|
||||
if ((exit_qual & 0x38UL) == 0x28UL) {
|
||||
io_req->io_type = REQ_WP;
|
||||
}
|
||||
} else {
|
||||
/* Read operation */
|
||||
mmio_req->direction = REQUEST_READ;
|
||||
|
||||
/* TODO: Need to determine how sign extension is determined for
|
||||
* reads
|
||||
*/
|
||||
}
|
||||
|
||||
/* Get the guest physical address */
|
||||
gpa = exec_vmread64(VMX_GUEST_PHYSICAL_ADDR_FULL);
|
||||
|
||||
TRACE_2L(TRACE_VMEXIT_EPT_VIOLATION, exit_qual, gpa);
|
||||
|
||||
/* Adjust IPA appropriately and OR page offset to get full IPA of abort
|
||||
*/
|
||||
mmio_req->address = gpa;
|
||||
/*caused by instruction fetch */
|
||||
if ((exit_qual & 0x4UL) != 0UL) {
|
||||
if (vcpu->arch.cur_context == NORMAL_WORLD) {
|
||||
ept_modify_mr(vcpu->vm, (uint64_t *)vcpu->vm->arch_vm.nworld_eptp,
|
||||
gpa & PAGE_MASK, PAGE_SIZE, EPT_EXE, 0UL);
|
||||
} else {
|
||||
ept_modify_mr(vcpu->vm, (uint64_t *)vcpu->vm->arch_vm.sworld_eptp,
|
||||
gpa & PAGE_MASK, PAGE_SIZE, EPT_EXE, 0UL);
|
||||
}
|
||||
vcpu_retain_rip(vcpu);
|
||||
status = 0;
|
||||
} else {
|
||||
|
||||
ret = decode_instruction(vcpu);
|
||||
if (ret > 0) {
|
||||
mmio_req->size = (uint64_t)ret;
|
||||
/*
|
||||
* For MMIO write, ask DM to run MMIO emulation after
|
||||
* instruction emulation. For MMIO read, ask DM to run MMIO
|
||||
* emulation at first.
|
||||
io_req->io_type = REQ_MMIO;
|
||||
|
||||
/* Specify if read or write operation */
|
||||
if ((exit_qual & 0x2UL) != 0UL) {
|
||||
/* Write operation */
|
||||
mmio_req->direction = REQUEST_WRITE;
|
||||
mmio_req->value = 0UL;
|
||||
|
||||
/* XXX: write access while EPT perm RX -> WP */
|
||||
if ((exit_qual & 0x38UL) == 0x28UL) {
|
||||
io_req->io_type = REQ_WP;
|
||||
}
|
||||
} else {
|
||||
/* Read operation */
|
||||
mmio_req->direction = REQUEST_READ;
|
||||
|
||||
/* TODO: Need to determine how sign extension is determined for
|
||||
* reads
|
||||
*/
|
||||
}
|
||||
|
||||
/* Adjust IPA appropriately and OR page offset to get full IPA of abort
|
||||
*/
|
||||
mmio_req->address = gpa;
|
||||
|
||||
/* Determine value being written. */
|
||||
if (mmio_req->direction == REQUEST_WRITE) {
|
||||
status = emulate_instruction(vcpu);
|
||||
if (status != 0) {
|
||||
ret = -EFAULT;
|
||||
ret = decode_instruction(vcpu);
|
||||
if (ret > 0) {
|
||||
mmio_req->size = (uint64_t)ret;
|
||||
/*
|
||||
* For MMIO write, ask DM to run MMIO emulation after
|
||||
* instruction emulation. For MMIO read, ask DM to run MMIO
|
||||
* emulation at first.
|
||||
*/
|
||||
|
||||
/* Determine value being written. */
|
||||
if (mmio_req->direction == REQUEST_WRITE) {
|
||||
status = emulate_instruction(vcpu);
|
||||
if (status != 0) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret > 0) {
|
||||
status = emulate_io(vcpu, io_req);
|
||||
}
|
||||
} else {
|
||||
if (ret == -EFAULT) {
|
||||
pr_info("page fault happen during decode_instruction");
|
||||
status = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret > 0) {
|
||||
status = emulate_io(vcpu, io_req);
|
||||
}
|
||||
} else {
|
||||
if (ret == -EFAULT) {
|
||||
pr_info("page fault happen during decode_instruction");
|
||||
status = 0;
|
||||
if (ret <= 0) {
|
||||
pr_acrnlog("Guest Linear Address: 0x%016lx", exec_vmread(VMX_GUEST_LINEAR_ADDR));
|
||||
pr_acrnlog("Guest Physical Address address: 0x%016lx", gpa);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret <= 0) {
|
||||
pr_acrnlog("Guest Linear Address: 0x%016lx", exec_vmread(VMX_GUEST_LINEAR_ADDR));
|
||||
pr_acrnlog("Guest Physical Address address: 0x%016lx", gpa);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
|
|
@ -60,6 +60,9 @@ static inline struct page *ppt_get_pd_page(const union pgtable_pages_info *info,
|
|||
return pd_page;
|
||||
}
|
||||
|
||||
static inline void ppt_tweak_exe_right(uint64_t *entry __attribute__((unused))) {}
|
||||
static inline void ppt_recover_exe_right(uint64_t *entry __attribute__((unused))) {}
|
||||
|
||||
const struct memory_ops ppt_mem_ops = {
|
||||
.info = &ppt_pages_info,
|
||||
.get_default_access_right = ppt_get_default_access_right,
|
||||
|
@ -68,6 +71,8 @@ const struct memory_ops ppt_mem_ops = {
|
|||
.get_pdpt_page = ppt_get_pdpt_page,
|
||||
.get_pd_page = ppt_get_pd_page,
|
||||
.clflush_pagewalk = ppt_clflush_pagewalk,
|
||||
.tweak_exe_right = ppt_tweak_exe_right,
|
||||
.recover_exe_right = ppt_recover_exe_right,
|
||||
};
|
||||
|
||||
static struct page sos_vm_pml4_pages[PML4_PAGE_NUM(EPT_ADDRESS_SPACE(CONFIG_SOS_RAM_SIZE))];
|
||||
|
@ -164,6 +169,20 @@ static inline void *ept_get_sworld_memory_base(const union pgtable_pages_info *i
|
|||
return info->ept.sworld_memory_base;
|
||||
}
|
||||
|
||||
/* The function is used to disable execute right for (2MB / 1GB)large pages in EPT */
|
||||
static inline void ept_tweak_exe_right(uint64_t *entry)
|
||||
{
|
||||
*entry &= ~EPT_EXE;
|
||||
}
|
||||
|
||||
/* The function is used to recover the execute right when large pages are breaking into 4KB pages
|
||||
* Hypervisor doesn't control execute right for guest memory, recovers execute right by default.
|
||||
*/
|
||||
static inline void ept_recover_exe_right(uint64_t *entry)
|
||||
{
|
||||
*entry |= EPT_EXE;
|
||||
}
|
||||
|
||||
void init_ept_mem_ops(struct memory_ops *mem_ops, uint16_t vm_id)
|
||||
{
|
||||
if (vm_id != 0U) {
|
||||
|
@ -185,4 +204,7 @@ void init_ept_mem_ops(struct memory_ops *mem_ops, uint16_t vm_id)
|
|||
mem_ops->get_pd_page = ept_get_pd_page;
|
||||
mem_ops->get_pt_page = ept_get_pt_page;
|
||||
mem_ops->clflush_pagewalk = ept_clflush_pagewalk;
|
||||
/* Mitigation for issue "Machine Check Error on Page Size Change" */
|
||||
mem_ops->tweak_exe_right = ept_tweak_exe_right;
|
||||
mem_ops->recover_exe_right = ept_recover_exe_right;
|
||||
}
|
||||
|
|
|
@ -37,6 +37,7 @@ static void split_large_page(uint64_t *pte, enum _page_table_level level,
|
|||
paddrinc = PTE_SIZE;
|
||||
ref_prot = (*pte) & ~PDE_PFN_MASK;
|
||||
ref_prot &= ~PAGE_PSE;
|
||||
mem_ops->recover_exe_right(&ref_prot);
|
||||
pbase = (uint64_t *)mem_ops->get_pt_page(mem_ops->info, vaddr);
|
||||
break;
|
||||
}
|
||||
|
@ -298,6 +299,7 @@ static void add_pde(const uint64_t *pdpte, uint64_t paddr_start, uint64_t vaddr_
|
|||
if (mem_aligned_check(paddr, PDE_SIZE) &&
|
||||
mem_aligned_check(vaddr, PDE_SIZE) &&
|
||||
(vaddr_next <= vaddr_end)) {
|
||||
mem_ops->tweak_exe_right(&prot);
|
||||
set_pgentry(pde, paddr | (prot | PAGE_PSE), mem_ops);
|
||||
if (vaddr_next < vaddr_end) {
|
||||
paddr += (vaddr_next - vaddr);
|
||||
|
@ -344,6 +346,7 @@ static void add_pdpte(const uint64_t *pml4e, uint64_t paddr_start, uint64_t vadd
|
|||
if (mem_aligned_check(paddr, PDPTE_SIZE) &&
|
||||
mem_aligned_check(vaddr, PDPTE_SIZE) &&
|
||||
(vaddr_next <= vaddr_end)) {
|
||||
mem_ops->tweak_exe_right(&prot);
|
||||
set_pgentry(pdpte, paddr | (prot | PAGE_PSE), mem_ops);
|
||||
if (vaddr_next < vaddr_end) {
|
||||
paddr += (vaddr_next - vaddr);
|
||||
|
|
|
@ -72,6 +72,8 @@ struct memory_ops {
|
|||
struct page *(*get_pt_page)(const union pgtable_pages_info *info, uint64_t gpa);
|
||||
void *(*get_sworld_memory_base)(const union pgtable_pages_info *info);
|
||||
void (*clflush_pagewalk)(const void *p);
|
||||
void (*tweak_exe_right)(uint64_t *entry);
|
||||
void (*recover_exe_right)(uint64_t *entry);
|
||||
};
|
||||
|
||||
extern const struct memory_ops ppt_mem_ops;
|
||||
|
|
Loading…
Reference in New Issue