hv:enable GVT-d for pre-launched linux guest in logical partion mode

When pass-thru GPU to pre-launched Linux guest, need to pass GPU OpRegion to the guest. Here's the detailed steps: 1. reserve a memory region in ve820 table for GPU OpRegion 2. build EPT mapping for GPU OpRegion to pass-thru OpRegion to guest 3. emulate the pci config register for OpRegion For the third step, here's detailed description: The address of OpRegion locates on PCI config space offset 0xFC, Normal Linux guest won't write this register, so we can regard this register as read-only. When guest reads this register, return the emulated value. When guest writes this register, ignore the operation. Tracked-On: #6387 Signed-off-by: Liu,Junming <junming.liu@intel.com>
2021-08-12 09:30:05 +00:00 · 2021-08-12 09:30:05 +00:00 · 2c5c8754de
parent 1dca1da96d
commit 2c5c8754de
4 changed files with 49 additions and 6 deletions
--- a/hypervisor/arch/x86/guest/ve820.c
+++ b/hypervisor/arch/x86/guest/ve820.c
@ -12,10 +12,11 @@
 #include <vacpi.h>
 #include <logmsg.h>
 #include <asm/rtcm.h>
 #include <ptdev.h>
 #define ENTRY_HPA1_LOW_PART1	2U
-#define ENTRY_HPA1_LOW_PART2	4U
+#define ENTRY_HPA1_LOW_PART2	5U
-#define ENTRY_HPA1_HI		8U
+#define ENTRY_HPA1_HI		9U
 static struct e820_entry sos_vm_e820[E820_MAX_ENTRIES];
 static struct e820_entry pre_vm_e820[PRE_VM_NUM][E820_MAX_ENTRIES];
@ -197,9 +198,14 @@ static const struct e820_entry pre_ve820_template[E820_MAX_ENTRIES] = {
 		.length   = PRE_RTVM_SW_SRAM_MAX_SIZE,
 		.type     = E820_TYPE_RESERVED
 	},
 	{	/* GPU OpRegion for pre-launched VM */
 		.baseaddr = GPU_OPREGION_GPA,
 		.length   = GPU_OPREGION_SIZE,
 		.type     = E820_TYPE_RESERVED
 	},
 	{	/* part2 of lowmem of hpa1*/
-		.baseaddr = PRE_RTVM_SW_SRAM_BASE_GPA + PRE_RTVM_SW_SRAM_MAX_SIZE,
+		.baseaddr = GPU_OPREGION_GPA + GPU_OPREGION_SIZE,
-		.length   = VIRT_ACPI_DATA_ADDR - (PRE_RTVM_SW_SRAM_BASE_GPA + PRE_RTVM_SW_SRAM_MAX_SIZE),
+		.length   = VIRT_ACPI_DATA_ADDR - (GPU_OPREGION_GPA + GPU_OPREGION_SIZE),
 		.type     = E820_TYPE_RAM
 	},
 	{	/* ACPI Reclaim */
--- a/hypervisor/dm/vpci/pci_pt.c
+++ b/hypervisor/dm/vpci/pci_pt.c
@ -35,6 +35,7 @@
 #include <asm/mmu.h>
 #include <asm/io.h>
 #include <logmsg.h>
 #include <config.h>
 #include "vpci_priv.h"
 /**
@ -487,6 +488,28 @@ void vdev_pt_hide_sriov_cap(struct pci_vdev *vdev)
 	pr_acrnlog("Hide sriov cap for %02x:%02x.%x", vdev->pdev->bdf.bits.b, vdev->pdev->bdf.bits.d, vdev->pdev->bdf.bits.f);
 }
 /* TODO:
 * The OpRegion is not 4KB aligned, while under some platforms,
 * it will take up to 16KB. In this case, OpRegion overlay 5 pages.
 * So set GPU_OPREGION_SIZE to 0x5000U(20KB) here.
 *
 * The solution that pass-thru OpRegion has potential security issue.
 * Will take the copy + emulation solution to expose host OpRegion to guest later.
 */
 void passthru_gpu_opregion(struct pci_vdev *vdev)
 {
 	uint32_t gpu_opregion_hpa, gpu_opregion_gpa, gpu_asls_phys;
 	gpu_opregion_gpa = GPU_OPREGION_GPA;
 	gpu_asls_phys = pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U);
 	gpu_opregion_hpa = gpu_asls_phys & PCIM_ASLS_OPREGION_MASK;
 	ept_add_mr(vpci2vm(vdev->vpci), vpci2vm(vdev->vpci)->arch_vm.nworld_eptp,
 			gpu_opregion_hpa, gpu_opregion_gpa,
 			GPU_OPREGION_SIZE, EPT_RD | EPT_UNCACHED);
 	pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, gpu_opregion_gpa | (gpu_asls_phys & ~PCIM_ASLS_OPREGION_MASK));
 }
 /*
 * @brief Initialize a specified passthrough vdev structure.
 *
@ -523,6 +546,10 @@ void init_vdev_pt(struct pci_vdev *vdev, bool is_pf_vdev)
 			/* Disable INTX */
 			pci_command |= 0x400U;
 			pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command);
 			if (vdev->pdev->bdf.value == CONFIG_GPU_SBDF) {
 				passthru_gpu_opregion(vdev);
 			}
 		}
 	} else {
 		if (vdev->phyfun->vpci != vdev->vpci) {
--- a/hypervisor/dm/vpci/vpci.c
+++ b/hypervisor/dm/vpci/vpci.c
@ -516,8 +516,10 @@ static int32_t write_pt_dev_cfg(struct pci_vdev *vdev, uint32_t offset,
 	} else {
 		if (offset != vdev->pdev->sriov.pre_pos) {
 			if (!is_quirk_ptdev(vdev)) {
 				if ((vdev->pdev->bdf.value != CONFIG_GPU_SBDF) || (offset != PCIR_ASLS_CTL)) {
 					/* passthru to physical device */
 					pci_pdev_write_cfg(vdev->pdev->bdf, offset, bytes, val);
 				}
 			} else {
 				ret = -ENODEV;
 			}
@ -544,6 +546,9 @@ static int32_t read_pt_dev_cfg(const struct pci_vdev *vdev, uint32_t offset,
 		} else if (!is_quirk_ptdev(vdev)) {
 			/* passthru to physical device */
 			*val = pci_pdev_read_cfg(vdev->pdev->bdf, offset, bytes);
 			if ((vdev->pdev->bdf.value == CONFIG_GPU_SBDF) && (offset == PCIR_ASLS_CTL)) {
 				*val = pci_vdev_read_vcfg(vdev, offset, bytes);
 			}
 		} else {
 			ret = -ENODEV;
 		}
--- a/hypervisor/include/common/ptdev.h
+++ b/hypervisor/include/common/ptdev.h
@ -19,6 +19,11 @@ enum intx_ctlr {
 #define PTDEV_INTR_MSI		(1U << 0U)
 #define PTDEV_INTR_INTX		(1U << 1U)
 #define GPU_OPREGION_SIZE	0x5000U
 #define GPU_OPREGION_GPA	0x40880000U
 #define PCIR_ASLS_CTL		0xfcU /* register offset in PCIe configuration space for Opregion base address */
 #define PCIM_ASLS_OPREGION_MASK	0xfffff000U /* opregion need 4KB aligned */
 #define INVALID_PTDEV_ENTRY_ID 0xffffU
 #define DEFINE_MSI_SID(name, a, b)	\