diff --git a/hypervisor/arch/x86/cpu.c b/hypervisor/arch/x86/cpu.c index e57a118ba..84782d045 100644 --- a/hypervisor/arch/x86/cpu.c +++ b/hypervisor/arch/x86/cpu.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -156,6 +157,8 @@ void init_pcpu_pre(bool is_bsp) load_pcpu_state_data(); + init_frequency_policy(); + init_e820(); /* reserve ppt buffer from e820 */ @@ -315,6 +318,8 @@ void init_pcpu_post(uint16_t pcpu_id) panic("failed to initialize software SRAM!"); } + apply_frequency_policy(); + init_sched(pcpu_id); #ifdef CONFIG_RDT_ENABLED diff --git a/hypervisor/arch/x86/pm.c b/hypervisor/arch/x86/pm.c index 9af9362cc..0629f5ee3 100644 --- a/hypervisor/arch/x86/pm.c +++ b/hypervisor/arch/x86/pm.c @@ -19,6 +19,8 @@ #include #include #include +#include +#include struct cpu_context cpu_ctx; @@ -271,3 +273,84 @@ void reset_host(void) asm_pause(); } } + +static enum acrn_cpufreq_policy_type cpufreq_policy = CPUFREQ_POLICY_PERFORMANCE; + +void init_frequency_policy(void) +{ + uint32_t cpuid_06_eax, unused; + struct acrn_boot_info *abi = get_acrn_boot_info(); + const char *cmd_src = abi->cmdline; + + /* + * Parse cmdline, decide which policy type to use. + * User can either specify cpu_perf_policy=Nominal or cpu_perf_policy=Performance + * The default type is 'Performance' + */ + if(strstr_s(cmd_src, MAX_BOOTARGS_SIZE, "cpu_perf_policy=Nominal", 24U) != NULL) { + cpufreq_policy = CPUFREQ_POLICY_NOMINAL; + } + + cpuid_subleaf(0x6U, 0U, &cpuid_06_eax, &unused, &unused, &unused); + if ((cpuid_06_eax & CPUID_EAX_HWP) != 0U) { + /* If HWP is available, enable HWP early. This will unlock other HWP MSRs. */ + msr_write(MSR_IA32_PM_ENABLE, 1U); + } +} + +/* + * This Function is to be called by each pcpu after init_cpufreq(). + * It applies the frequency policy, which can be specified from boot parameters. + * - cpu_perf_policy=Performance: HWP autonomous selection, between highest HWP level and + * lowest HWP level. If HWP is not avaliable, the frequency is fixed to highest p-state. + * - cpu_perf_policy=Nominal: frequency is fixed to guaranteed HWP level or nominal p-state. + * The default policy is 'Performance'. + * + * ACRN will not be governing pcpu's frequency after this. + */ +void apply_frequency_policy(void) +{ + struct acrn_cpufreq_limits *limits = &cpufreq_limits[get_pcpu_id()]; + uint64_t highest_lvl_req = limits->highest_hwp_lvl, lowest_lvl_req = limits->lowest_hwp_lvl, reg; + uint8_t pstate_req = limits->performance_pstate; + uint32_t cpuid_06_eax, cpuid_01_ecx, unused; + + cpuid_subleaf(0x6U, 0U, &cpuid_06_eax, &unused, &unused, &unused); + cpuid_subleaf(0x1U, 0U, &unused, &unused, &cpuid_01_ecx, &unused); + /* Both HWP and ACPI p-state are supported. HWP is the first choise. */ + if ((cpuid_06_eax & CPUID_EAX_HWP) != 0U) { + /* + * For Performance policy(default): CPU frequency will be autonomously selected between highest and lowest + * For Nominal policy: set to fixed frequency by letting highest=lowest=guaranteed + */ + if (cpufreq_policy == CPUFREQ_POLICY_NOMINAL) { + highest_lvl_req = limits->guaranteed_hwp_lvl; + lowest_lvl_req = limits->guaranteed_hwp_lvl; + } + /* EPP(0x80: default) | Desired_Performance(0: HWP auto) | Maximum_Performance | Minimum_Performance */ + reg = (0x80UL << 24U) | (0x00UL << 16U) | (highest_lvl_req << 8U) | lowest_lvl_req; + msr_write(MSR_IA32_HWP_REQUEST, reg); + } else if ((cpuid_01_ecx & CPUID_ECX_EST) != 0U) { + struct cpu_state_info *pm_s_state_data = get_cpu_pm_state_info(); + + /* + * Set to fixed frequency in ACPI p-state mode. + * Performance policy: performance_pstate + * Nominal policy: nominal_pstate + */ + if (cpufreq_policy == CPUFREQ_POLICY_NOMINAL) { + pstate_req = limits->nominal_pstate; + } + + /* PX info might be missing on some platforms (px_cnt equels 0). Do nothing if so. */ + if (pm_s_state_data->px_cnt != 0) { + if (pstate_req < pm_s_state_data->px_cnt) { + msr_write(MSR_IA32_PERF_CTL, pm_s_state_data->px_data[pstate_req].control); + } else { + ASSERT(false, "invalid p-state index"); + } + } + } else { + /* If no frequency interface is presented, just let CPU run by itself. Do nothing here.*/ + } +} diff --git a/hypervisor/include/arch/x86/asm/board.h b/hypervisor/include/arch/x86/asm/board.h index 56bbeb9c8..25b2dcc32 100644 --- a/hypervisor/include/arch/x86/asm/board.h +++ b/hypervisor/include/arch/x86/asm/board.h @@ -34,6 +34,7 @@ extern struct rdt_type res_cap_info[RDT_NUM_RESOURCES]; #endif extern const struct cpu_state_table board_cpu_state_tbl; +extern struct acrn_cpufreq_limits cpufreq_limits[MAX_PCPU_NUM]; extern const union pci_bdf plat_hidden_pdevs[MAX_HIDDEN_PDEVS_NUM]; extern const struct vmsix_on_msi_info vmsix_on_msi_devs[MAX_VMSIX_ON_MSI_PDEVS_NUM]; diff --git a/hypervisor/include/arch/x86/asm/host_pm.h b/hypervisor/include/arch/x86/asm/host_pm.h index b8fb8a307..6ee25e582 100644 --- a/hypervisor/include/arch/x86/asm/host_pm.h +++ b/hypervisor/include/arch/x86/asm/host_pm.h @@ -39,5 +39,7 @@ extern void restore_s3_context(void); struct cpu_state_info *get_cpu_pm_state_info(void); struct acpi_reset_reg *get_host_reset_reg_data(void); void reset_host(void); +void init_frequency_policy(void); +void apply_frequency_policy(void); #endif /* HOST_PM_H */ diff --git a/hypervisor/include/public/acrn_common.h b/hypervisor/include/public/acrn_common.h index c332632e4..736e3c891 100644 --- a/hypervisor/include/public/acrn_common.h +++ b/hypervisor/include/public/acrn_common.h @@ -530,6 +530,21 @@ struct acrn_pstate_data { uint64_t status; /* success indicator */ }; +enum acrn_cpufreq_policy_type { + CPUFREQ_POLICY_PERFORMANCE, + CPUFREQ_POLICY_NOMINAL, +}; + +struct acrn_cpufreq_limits { + /* Performance levels for HWP */ + uint8_t guaranteed_hwp_lvl; + uint8_t highest_hwp_lvl; + uint8_t lowest_hwp_lvl; + /* Index for the p-state table _PSS */ + uint8_t nominal_pstate; + uint8_t performance_pstate; +}; + struct acpi_sx_pkg { uint8_t val_pm1a; uint8_t val_pm1b;