hv:enable GVT-d for pre-launched linux guest in logical partion mode

When pass-thru GPU to pre-launched Linux guest,
need to pass GPU OpRegion to the guest.
Here's the detailed steps:
1. reserve a memory region in ve820 table for GPU OpRegion
2. build EPT mapping for GPU OpRegion to pass-thru OpRegion to guest
3. emulate the pci config register for OpRegion
For the third step, here's detailed description:
The address of OpRegion locates on PCI config space offset 0xFC,
Normal Linux guest won't write this register,
so we can regard this register as read-only.
When guest reads this register, return the emulated value.
When guest writes this register, ignore the operation.

Tracked-On: #6387

Signed-off-by: Liu,Junming <junming.liu@intel.com>
This commit is contained in:
Liu,Junming 2021-08-12 09:30:05 +00:00 committed by wenlingz
parent 1dca1da96d
commit 2c5c8754de
4 changed files with 49 additions and 6 deletions

View File

@ -12,10 +12,11 @@
#include <vacpi.h> #include <vacpi.h>
#include <logmsg.h> #include <logmsg.h>
#include <asm/rtcm.h> #include <asm/rtcm.h>
#include <ptdev.h>
#define ENTRY_HPA1_LOW_PART1 2U #define ENTRY_HPA1_LOW_PART1 2U
#define ENTRY_HPA1_LOW_PART2 4U #define ENTRY_HPA1_LOW_PART2 5U
#define ENTRY_HPA1_HI 8U #define ENTRY_HPA1_HI 9U
static struct e820_entry sos_vm_e820[E820_MAX_ENTRIES]; static struct e820_entry sos_vm_e820[E820_MAX_ENTRIES];
static struct e820_entry pre_vm_e820[PRE_VM_NUM][E820_MAX_ENTRIES]; static struct e820_entry pre_vm_e820[PRE_VM_NUM][E820_MAX_ENTRIES];
@ -197,9 +198,14 @@ static const struct e820_entry pre_ve820_template[E820_MAX_ENTRIES] = {
.length = PRE_RTVM_SW_SRAM_MAX_SIZE, .length = PRE_RTVM_SW_SRAM_MAX_SIZE,
.type = E820_TYPE_RESERVED .type = E820_TYPE_RESERVED
}, },
{ /* GPU OpRegion for pre-launched VM */
.baseaddr = GPU_OPREGION_GPA,
.length = GPU_OPREGION_SIZE,
.type = E820_TYPE_RESERVED
},
{ /* part2 of lowmem of hpa1*/ { /* part2 of lowmem of hpa1*/
.baseaddr = PRE_RTVM_SW_SRAM_BASE_GPA + PRE_RTVM_SW_SRAM_MAX_SIZE, .baseaddr = GPU_OPREGION_GPA + GPU_OPREGION_SIZE,
.length = VIRT_ACPI_DATA_ADDR - (PRE_RTVM_SW_SRAM_BASE_GPA + PRE_RTVM_SW_SRAM_MAX_SIZE), .length = VIRT_ACPI_DATA_ADDR - (GPU_OPREGION_GPA + GPU_OPREGION_SIZE),
.type = E820_TYPE_RAM .type = E820_TYPE_RAM
}, },
{ /* ACPI Reclaim */ { /* ACPI Reclaim */

View File

@ -35,6 +35,7 @@
#include <asm/mmu.h> #include <asm/mmu.h>
#include <asm/io.h> #include <asm/io.h>
#include <logmsg.h> #include <logmsg.h>
#include <config.h>
#include "vpci_priv.h" #include "vpci_priv.h"
/** /**
@ -487,6 +488,28 @@ void vdev_pt_hide_sriov_cap(struct pci_vdev *vdev)
pr_acrnlog("Hide sriov cap for %02x:%02x.%x", vdev->pdev->bdf.bits.b, vdev->pdev->bdf.bits.d, vdev->pdev->bdf.bits.f); pr_acrnlog("Hide sriov cap for %02x:%02x.%x", vdev->pdev->bdf.bits.b, vdev->pdev->bdf.bits.d, vdev->pdev->bdf.bits.f);
} }
/* TODO:
* The OpRegion is not 4KB aligned, while under some platforms,
* it will take up to 16KB. In this case, OpRegion overlay 5 pages.
* So set GPU_OPREGION_SIZE to 0x5000U(20KB) here.
*
* The solution that pass-thru OpRegion has potential security issue.
* Will take the copy + emulation solution to expose host OpRegion to guest later.
*/
void passthru_gpu_opregion(struct pci_vdev *vdev)
{
uint32_t gpu_opregion_hpa, gpu_opregion_gpa, gpu_asls_phys;
gpu_opregion_gpa = GPU_OPREGION_GPA;
gpu_asls_phys = pci_pdev_read_cfg(vdev->pdev->bdf, PCIR_ASLS_CTL, 4U);
gpu_opregion_hpa = gpu_asls_phys & PCIM_ASLS_OPREGION_MASK;
ept_add_mr(vpci2vm(vdev->vpci), vpci2vm(vdev->vpci)->arch_vm.nworld_eptp,
gpu_opregion_hpa, gpu_opregion_gpa,
GPU_OPREGION_SIZE, EPT_RD | EPT_UNCACHED);
pci_vdev_write_vcfg(vdev, PCIR_ASLS_CTL, 4U, gpu_opregion_gpa | (gpu_asls_phys & ~PCIM_ASLS_OPREGION_MASK));
}
/* /*
* @brief Initialize a specified passthrough vdev structure. * @brief Initialize a specified passthrough vdev structure.
* *
@ -523,6 +546,10 @@ void init_vdev_pt(struct pci_vdev *vdev, bool is_pf_vdev)
/* Disable INTX */ /* Disable INTX */
pci_command |= 0x400U; pci_command |= 0x400U;
pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command); pci_pdev_write_cfg(vdev->pdev->bdf, PCIR_COMMAND, 2U, pci_command);
if (vdev->pdev->bdf.value == CONFIG_GPU_SBDF) {
passthru_gpu_opregion(vdev);
}
} }
} else { } else {
if (vdev->phyfun->vpci != vdev->vpci) { if (vdev->phyfun->vpci != vdev->vpci) {

View File

@ -516,8 +516,10 @@ static int32_t write_pt_dev_cfg(struct pci_vdev *vdev, uint32_t offset,
} else { } else {
if (offset != vdev->pdev->sriov.pre_pos) { if (offset != vdev->pdev->sriov.pre_pos) {
if (!is_quirk_ptdev(vdev)) { if (!is_quirk_ptdev(vdev)) {
if ((vdev->pdev->bdf.value != CONFIG_GPU_SBDF) || (offset != PCIR_ASLS_CTL)) {
/* passthru to physical device */ /* passthru to physical device */
pci_pdev_write_cfg(vdev->pdev->bdf, offset, bytes, val); pci_pdev_write_cfg(vdev->pdev->bdf, offset, bytes, val);
}
} else { } else {
ret = -ENODEV; ret = -ENODEV;
} }
@ -544,6 +546,9 @@ static int32_t read_pt_dev_cfg(const struct pci_vdev *vdev, uint32_t offset,
} else if (!is_quirk_ptdev(vdev)) { } else if (!is_quirk_ptdev(vdev)) {
/* passthru to physical device */ /* passthru to physical device */
*val = pci_pdev_read_cfg(vdev->pdev->bdf, offset, bytes); *val = pci_pdev_read_cfg(vdev->pdev->bdf, offset, bytes);
if ((vdev->pdev->bdf.value == CONFIG_GPU_SBDF) && (offset == PCIR_ASLS_CTL)) {
*val = pci_vdev_read_vcfg(vdev, offset, bytes);
}
} else { } else {
ret = -ENODEV; ret = -ENODEV;
} }

View File

@ -19,6 +19,11 @@ enum intx_ctlr {
#define PTDEV_INTR_MSI (1U << 0U) #define PTDEV_INTR_MSI (1U << 0U)
#define PTDEV_INTR_INTX (1U << 1U) #define PTDEV_INTR_INTX (1U << 1U)
#define GPU_OPREGION_SIZE 0x5000U
#define GPU_OPREGION_GPA 0x40880000U
#define PCIR_ASLS_CTL 0xfcU /* register offset in PCIe configuration space for Opregion base address */
#define PCIM_ASLS_OPREGION_MASK 0xfffff000U /* opregion need 4KB aligned */
#define INVALID_PTDEV_ENTRY_ID 0xffffU #define INVALID_PTDEV_ENTRY_ID 0xffffU
#define DEFINE_MSI_SID(name, a, b) \ #define DEFINE_MSI_SID(name, a, b) \