From 2c5c8754dec57bb3866467c0e982c59aa3a0ba1f Mon Sep 17 00:00:00 2001 From: "Liu,Junming" Date: Thu, 12 Aug 2021 09:30:05 +0000 Subject: [PATCH] hv:enable GVT-d for pre-launched linux guest in logical partion mode When pass-thru GPU to pre-launched Linux guest, need to pass GPU OpRegion to the guest. Here's the detailed steps: 1. reserve a memory region in ve820 table for GPU OpRegion 2. build EPT mapping for GPU OpRegion to pass-thru OpRegion to guest 3. emulate the pci config register for OpRegion For the third step, here's detailed description: The address of OpRegion locates on PCI config space offset 0xFC, Normal Linux guest won't write this register, so we can regard this register as read-only. When guest reads this register, return the emulated value. When guest writes this register, ignore the operation. Tracked-On: #6387 Signed-off-by: Liu,Junming --- hypervisor/arch/x86/guest/ve820.c | 14 ++++++++++---- hypervisor/dm/vpci/pci_pt.c | 27 +++++++++++++++++++++++++++ hypervisor/dm/vpci/vpci.c | 9 +++++++-- hypervisor/include/common/ptdev.h | 5 +++++ 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/hypervisor/arch/x86/guest/ve820.c b/hypervisor/arch/x86/guest/ve820.c index 8de17d519..4e8dc5581 100644 --- a/hypervisor/arch/x86/guest/ve820.c +++ b/hypervisor/arch/x86/guest/ve820.c @@ -12,10 +12,11 @@ #include #include #include +#include #define ENTRY_HPA1_LOW_PART1 2U -#define ENTRY_HPA1_LOW_PART2 4U -#define ENTRY_HPA1_HI 8U +#define ENTRY_HPA1_LOW_PART2 5U +#define ENTRY_HPA1_HI 9U static struct e820_entry sos_vm_e820[E820_MAX_ENTRIES]; static struct e820_entry pre_vm_e820[PRE_VM_NUM][E820_MAX_ENTRIES]; @@ -197,9 +198,14 @@ static const struct e820_entry pre_ve820_template[E820_MAX_ENTRIES] = { .length = PRE_RTVM_SW_SRAM_MAX_SIZE, .type = E820_TYPE_RESERVED }, + { /* GPU OpRegion for pre-launched VM */ + .baseaddr = GPU_OPREGION_GPA, + .length = GPU_OPREGION_SIZE, + .type = E820_TYPE_RESERVED + }, { /* part2 of lowmem of hpa1*/ - .baseaddr = PRE_RTVM_SW_SRAM_BASE_GPA + PRE_RTVM_SW_SRAM_MAX_SIZE, - .length = VIRT_ACPI_DATA_ADDR - (PRE_RTVM_SW_SRAM_BASE_GPA + PRE_RTVM_SW_SRAM_MAX_SIZE), + .baseaddr = GPU_OPREGION_GPA + GPU_OPREGION_SIZE, + .length = VIRT_ACPI_DATA_ADDR - (GPU_OPREGION_GPA + GPU_OPREGION_SIZE), .type = E820_TYPE_RAM }, { /* ACPI Reclaim */ diff --git a/hypervisor/dm/vpci/pci_pt.c b/hypervisor/dm/vpci/pci_pt.c index 01b9d2fb4..ab3c9ed68 100644 --- a/hypervisor/dm/vpci/pci_pt.c +++ b/hypervisor/dm/vpci/pci_pt.c @@ -35,6 +35,7 @@ #include #include #include +#include #include "vpci_priv.h" /** @@ -487,6 +488,28 @@ void vdev_pt_hide_sriov_cap(struct pci_vdev *vdev) pr_acrnlog("Hide sriov cap for %02x:%02x.%x", vdev->pdev->bdf.bits.b, vdev->pdev->bdf.bits.d, vdev->pdev->bdf.bits.f); } + +/* TODO: + * The OpRegion is not 4KB aligned, while under some platforms, + * it will take up to 16KB. In this case, OpRegion overlay 5 pages. + * So set GPU_OPREGION_SIZE to 0x5000U(20KB) here. + * + * The solution that pass-thru OpRegion has potential security issue. + * Will take the copy + emulation solution to expose host OpRegion to guest later. + */ +void passthru_gpu_opregion(struct pci_vdev *vdev) +{ + uint32_t gpu_opregion_hpa, gpu_opregion_gpa, gpu_asls_phys; + + gpu_opregion_gpa = GPU_OPREGION_GPA; + gpu_asls_phys = pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U); + gpu_opregion_hpa = gpu_asls_phys & PCIM_ASLS_OPREGION_MASK; + ept_add_mr(vpci2vm(vdev->vpci), vpci2vm(vdev->vpci)->arch_vm.nworld_eptp, + gpu_opregion_hpa, gpu_opregion_gpa, + GPU_OPREGION_SIZE, EPT_RD | EPT_UNCACHED); + pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, gpu_opregion_gpa | (gpu_asls_phys & ~PCIM_ASLS_OPREGION_MASK)); +} + /* * @brief Initialize a specified passthrough vdev structure. * @@ -523,6 +546,10 @@ void init_vdev_pt(struct pci_vdev *vdev, bool is_pf_vdev) /* Disable INTX */ pci_command |= 0x400U; pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command); + + if (vdev->pdev->bdf.value == CONFIG_GPU_SBDF) { + passthru_gpu_opregion(vdev); + } } } else { if (vdev->phyfun->vpci != vdev->vpci) { diff --git a/hypervisor/dm/vpci/vpci.c b/hypervisor/dm/vpci/vpci.c index 73b7fcb9e..c1f813895 100644 --- a/hypervisor/dm/vpci/vpci.c +++ b/hypervisor/dm/vpci/vpci.c @@ -516,8 +516,10 @@ static int32_t write_pt_dev_cfg(struct pci_vdev *vdev, uint32_t offset, } else { if (offset != vdev->pdev->sriov.pre_pos) { if (!is_quirk_ptdev(vdev)) { - /* passthru to physical device */ - pci_pdev_write_cfg(vdev->pdev->bdf, offset, bytes, val); + if ((vdev->pdev->bdf.value != CONFIG_GPU_SBDF) || (offset != PCIR_ASLS_CTL)) { + /* passthru to physical device */ + pci_pdev_write_cfg(vdev->pdev->bdf, offset, bytes, val); + } } else { ret = -ENODEV; } @@ -544,6 +546,9 @@ static int32_t read_pt_dev_cfg(const struct pci_vdev *vdev, uint32_t offset, } else if (!is_quirk_ptdev(vdev)) { /* passthru to physical device */ *val = pci_pdev_read_cfg(vdev->pdev->bdf, offset, bytes); + if ((vdev->pdev->bdf.value == CONFIG_GPU_SBDF) && (offset == PCIR_ASLS_CTL)) { + *val = pci_vdev_read_vcfg(vdev, offset, bytes); + } } else { ret = -ENODEV; } diff --git a/hypervisor/include/common/ptdev.h b/hypervisor/include/common/ptdev.h index 2bc8d1146..f586e20bd 100644 --- a/hypervisor/include/common/ptdev.h +++ b/hypervisor/include/common/ptdev.h @@ -19,6 +19,11 @@ enum intx_ctlr { #define PTDEV_INTR_MSI (1U << 0U) #define PTDEV_INTR_INTX (1U << 1U) +#define GPU_OPREGION_SIZE 0x5000U +#define GPU_OPREGION_GPA 0x40880000U +#define PCIR_ASLS_CTL 0xfcU /* register offset in PCIe configuration space for Opregion base address */ +#define PCIM_ASLS_OPREGION_MASK 0xfffff000U /* opregion need 4KB aligned */ + #define INVALID_PTDEV_ENTRY_ID 0xffffU #define DEFINE_MSI_SID(name, a, b) \