amd-drm-fixes-6.5-2023-07-12:

amdgpu:
 - SMU i2c locking fix
 - Fix a possible deadlock in process restoration for ROCm apps
 - Disable PCIe lane/speed switching on Intel platforms (the platforms don't support it)
 -----BEGIN PGP SIGNATURE-----
 
 iHUEABYKAB0WIQQgO5Idg2tXNTSZAr293/aFa7yZ2AUCZK7yYQAKCRC93/aFa7yZ
 2JvYAQDpMj8/rLUsmWRk30jvkaZivgaeUEAG0FGaMpKaaATbvwEA2eHxN3xk5GKs
 ethEPp/zdivIrz6h/JWSCFrpCzqg4g8=
 =rsRJ
 -----END PGP SIGNATURE-----

Merge tag 'amd-drm-fixes-6.5-2023-07-12' of https://gitlab.freedesktop.org/agd5f/linux into drm-fixes

amd-drm-fixes-6.5-2023-07-12:

amdgpu:
- SMU i2c locking fix
- Fix a possible deadlock in process restoration for ROCm apps
- Disable PCIe lane/speed switching on Intel platforms (the platforms don't support it)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230712184009.7740-1-alexander.deucher@amd.com
This commit is contained in:
Dave Airlie 2023-07-14 11:44:54 +10:00
commit 38d88d5e97
12 changed files with 101 additions and 141 deletions

View File

@ -1296,6 +1296,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);
bool amdgpu_device_pcie_dynamic_switching_supported(void);
bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev);
bool amdgpu_device_aspm_support_quirk(void);

View File

@ -2881,6 +2881,9 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
if (!attachment->is_mapped)
continue;
if (attachment->bo_va->base.bo->tbo.pin_count)
continue;
kfd_mem_dmaunmap_attachment(mem, attachment);
ret = update_gpuvm_pte(mem, attachment, &sync_obj);
if (ret) {

View File

@ -1458,6 +1458,25 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev)
return true;
}
/*
* Intel hosts such as Raptor Lake and Sapphire Rapids don't support dynamic
* speed switching. Until we have confirmation from Intel that a specific host
* supports it, it's safer that we keep it disabled for all.
*
* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
*/
bool amdgpu_device_pcie_dynamic_switching_supported(void)
{
#if IS_ENABLED(CONFIG_X86)
struct cpuinfo_x86 *c = &cpu_data(0);
if (c->x86_vendor == X86_VENDOR_INTEL)
return false;
#endif
return true;
}
/**
* amdgpu_device_should_use_aspm - check if the device should program ASPM
*

View File

@ -295,5 +295,9 @@ int smu_v13_0_get_pptable_from_firmware(struct smu_context *smu,
uint32_t *size,
uint32_t pptable_id);
int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap);
#endif
#endif

View File

@ -2113,7 +2113,6 @@ static int arcturus_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
mutex_unlock(&adev->pm.mutex);
if (r)
goto fail;
@ -2130,6 +2129,7 @@ static int arcturus_i2c_xfer(struct i2c_adapter *i2c_adap,
}
r = num_msgs;
fail:
mutex_unlock(&adev->pm.mutex);
kfree(req);
return r;
}

View File

@ -3021,7 +3021,6 @@ static int navi10_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
mutex_unlock(&adev->pm.mutex);
if (r)
goto fail;
@ -3038,6 +3037,7 @@ static int navi10_i2c_xfer(struct i2c_adapter *i2c_adap,
}
r = num_msgs;
fail:
mutex_unlock(&adev->pm.mutex);
kfree(req);
return r;
}

View File

@ -2077,89 +2077,36 @@ static int sienna_cichlid_display_disable_memory_clock_switch(struct smu_context
return ret;
}
static void sienna_cichlid_get_override_pcie_settings(struct smu_context *smu,
uint32_t *gen_speed_override,
uint32_t *lane_width_override)
{
struct amdgpu_device *adev = smu->adev;
*gen_speed_override = 0xff;
*lane_width_override = 0xff;
switch (adev->pdev->device) {
case 0x73A0:
case 0x73A1:
case 0x73A2:
case 0x73A3:
case 0x73AB:
case 0x73AE:
/* Bit 7:0: PCIE lane width, 1 to 7 corresponds is x1 to x32 */
*lane_width_override = 6;
break;
case 0x73E0:
case 0x73E1:
case 0x73E3:
*lane_width_override = 4;
break;
case 0x7420:
case 0x7421:
case 0x7422:
case 0x7423:
case 0x7424:
*lane_width_override = 3;
break;
default:
break;
}
}
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static int sienna_cichlid_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_11_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_11_0_pcie_table *pcie_table = &dpm_context->dpm_tables.pcie_table;
uint32_t gen_speed_override, lane_width_override;
uint8_t *table_member1, *table_member2;
uint32_t min_gen_speed, max_gen_speed;
uint32_t min_lane_width, max_lane_width;
uint32_t smu_pcie_arg;
u32 smu_pcie_arg;
int ret, i;
GET_PPTABLE_MEMBER(PcieGenSpeed, &table_member1);
GET_PPTABLE_MEMBER(PcieLaneCount, &table_member2);
/* PCIE gen speed and lane width override */
if (!amdgpu_device_pcie_dynamic_switching_supported()) {
if (pcie_table->pcie_gen[NUM_LINK_LEVELS - 1] < pcie_gen_cap)
pcie_gen_cap = pcie_table->pcie_gen[NUM_LINK_LEVELS - 1];
sienna_cichlid_get_override_pcie_settings(smu,
&gen_speed_override,
&lane_width_override);
if (pcie_table->pcie_lane[NUM_LINK_LEVELS - 1] < pcie_width_cap)
pcie_width_cap = pcie_table->pcie_lane[NUM_LINK_LEVELS - 1];
/* PCIE gen speed override */
if (gen_speed_override != 0xff) {
min_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
max_gen_speed = MIN(pcie_gen_cap, gen_speed_override);
/* Force all levels to use the same settings */
for (i = 0; i < NUM_LINK_LEVELS; i++) {
pcie_table->pcie_gen[i] = pcie_gen_cap;
pcie_table->pcie_lane[i] = pcie_width_cap;
}
} else {
min_gen_speed = MAX(0, table_member1[0]);
max_gen_speed = MIN(pcie_gen_cap, table_member1[1]);
min_gen_speed = min_gen_speed > max_gen_speed ?
max_gen_speed : min_gen_speed;
for (i = 0; i < NUM_LINK_LEVELS; i++) {
if (pcie_table->pcie_gen[i] > pcie_gen_cap)
pcie_table->pcie_gen[i] = pcie_gen_cap;
if (pcie_table->pcie_lane[i] > pcie_width_cap)
pcie_table->pcie_lane[i] = pcie_width_cap;
}
}
pcie_table->pcie_gen[0] = min_gen_speed;
pcie_table->pcie_gen[1] = max_gen_speed;
/* PCIE lane width override */
if (lane_width_override != 0xff) {
min_lane_width = MIN(pcie_width_cap, lane_width_override);
max_lane_width = MIN(pcie_width_cap, lane_width_override);
} else {
min_lane_width = MAX(1, table_member2[0]);
max_lane_width = MIN(pcie_width_cap, table_member2[1]);
min_lane_width = min_lane_width > max_lane_width ?
max_lane_width : min_lane_width;
}
pcie_table->pcie_lane[0] = min_lane_width;
pcie_table->pcie_lane[1] = max_lane_width;
for (i = 0; i < NUM_LINK_LEVELS; i++) {
smu_pcie_arg = (i << 16 |
@ -3842,7 +3789,6 @@ static int sienna_cichlid_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
mutex_unlock(&adev->pm.mutex);
if (r)
goto fail;
@ -3859,6 +3805,7 @@ static int sienna_cichlid_i2c_xfer(struct i2c_adapter *i2c_adap,
}
r = num_msgs;
fail:
mutex_unlock(&adev->pm.mutex);
kfree(req);
return r;
}

View File

@ -1525,7 +1525,6 @@ static int aldebaran_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
mutex_unlock(&adev->pm.mutex);
if (r)
goto fail;
@ -1542,6 +1541,7 @@ static int aldebaran_i2c_xfer(struct i2c_adapter *i2c_adap,
}
r = num_msgs;
fail:
mutex_unlock(&adev->pm.mutex);
kfree(req);
return r;
}

View File

@ -2424,3 +2424,51 @@ int smu_v13_0_mode1_reset(struct smu_context *smu)
return ret;
}
int smu_v13_0_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_13_0_pcie_table *pcie_table =
&dpm_context->dpm_tables.pcie_table;
int num_of_levels = pcie_table->num_of_link_levels;
uint32_t smu_pcie_arg;
int ret, i;
if (!amdgpu_device_pcie_dynamic_switching_supported()) {
if (pcie_table->pcie_gen[num_of_levels - 1] < pcie_gen_cap)
pcie_gen_cap = pcie_table->pcie_gen[num_of_levels - 1];
if (pcie_table->pcie_lane[num_of_levels - 1] < pcie_width_cap)
pcie_width_cap = pcie_table->pcie_lane[num_of_levels - 1];
/* Force all levels to use the same settings */
for (i = 0; i < num_of_levels; i++) {
pcie_table->pcie_gen[i] = pcie_gen_cap;
pcie_table->pcie_lane[i] = pcie_width_cap;
}
} else {
for (i = 0; i < num_of_levels; i++) {
if (pcie_table->pcie_gen[i] > pcie_gen_cap)
pcie_table->pcie_gen[i] = pcie_gen_cap;
if (pcie_table->pcie_lane[i] > pcie_width_cap)
pcie_table->pcie_lane[i] = pcie_width_cap;
}
}
for (i = 0; i < num_of_levels; i++) {
smu_pcie_arg = i << 16;
smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
smu_pcie_arg |= pcie_table->pcie_lane[i];
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_OverridePcieParameters,
smu_pcie_arg,
NULL);
if (ret)
return ret;
}
return 0;
}

View File

@ -1645,37 +1645,6 @@ static int smu_v13_0_0_force_clk_levels(struct smu_context *smu,
return ret;
}
static int smu_v13_0_0_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_13_0_pcie_table *pcie_table =
&dpm_context->dpm_tables.pcie_table;
uint32_t smu_pcie_arg;
int ret, i;
for (i = 0; i < pcie_table->num_of_link_levels; i++) {
if (pcie_table->pcie_gen[i] > pcie_gen_cap)
pcie_table->pcie_gen[i] = pcie_gen_cap;
if (pcie_table->pcie_lane[i] > pcie_width_cap)
pcie_table->pcie_lane[i] = pcie_width_cap;
smu_pcie_arg = i << 16;
smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
smu_pcie_arg |= pcie_table->pcie_lane[i];
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_OverridePcieParameters,
smu_pcie_arg,
NULL);
if (ret)
return ret;
}
return 0;
}
static const struct smu_temperature_range smu13_thermal_policy[] = {
{-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
{ 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000, 120000},
@ -2320,7 +2289,6 @@ static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_cmn_update_table(smu, SMU_TABLE_I2C_COMMANDS, 0, req, true);
mutex_unlock(&adev->pm.mutex);
if (r)
goto fail;
@ -2337,6 +2305,7 @@ static int smu_v13_0_0_i2c_xfer(struct i2c_adapter *i2c_adap,
}
r = num_msgs;
fail:
mutex_unlock(&adev->pm.mutex);
kfree(req);
return r;
}
@ -2654,7 +2623,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
.feature_is_enabled = smu_cmn_feature_is_enabled,
.print_clk_levels = smu_v13_0_0_print_clk_levels,
.force_clk_levels = smu_v13_0_0_force_clk_levels,
.update_pcie_parameters = smu_v13_0_0_update_pcie_parameters,
.update_pcie_parameters = smu_v13_0_update_pcie_parameters,
.get_thermal_temperature_range = smu_v13_0_0_get_thermal_temperature_range,
.register_irq_handler = smu_v13_0_register_irq_handler,
.enable_thermal_alert = smu_v13_0_enable_thermal_alert,

View File

@ -1763,7 +1763,6 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
}
mutex_lock(&adev->pm.mutex);
r = smu_v13_0_6_request_i2c_xfer(smu, req);
mutex_unlock(&adev->pm.mutex);
if (r)
goto fail;
@ -1780,6 +1779,7 @@ static int smu_v13_0_6_i2c_xfer(struct i2c_adapter *i2c_adap,
}
r = num_msgs;
fail:
mutex_unlock(&adev->pm.mutex);
kfree(req);
return r;
}

View File

@ -1635,37 +1635,6 @@ static int smu_v13_0_7_force_clk_levels(struct smu_context *smu,
return ret;
}
static int smu_v13_0_7_update_pcie_parameters(struct smu_context *smu,
uint32_t pcie_gen_cap,
uint32_t pcie_width_cap)
{
struct smu_13_0_dpm_context *dpm_context = smu->smu_dpm.dpm_context;
struct smu_13_0_pcie_table *pcie_table =
&dpm_context->dpm_tables.pcie_table;
uint32_t smu_pcie_arg;
int ret, i;
for (i = 0; i < pcie_table->num_of_link_levels; i++) {
if (pcie_table->pcie_gen[i] > pcie_gen_cap)
pcie_table->pcie_gen[i] = pcie_gen_cap;
if (pcie_table->pcie_lane[i] > pcie_width_cap)
pcie_table->pcie_lane[i] = pcie_width_cap;
smu_pcie_arg = i << 16;
smu_pcie_arg |= pcie_table->pcie_gen[i] << 8;
smu_pcie_arg |= pcie_table->pcie_lane[i];
ret = smu_cmn_send_smc_msg_with_param(smu,
SMU_MSG_OverridePcieParameters,
smu_pcie_arg,
NULL);
if (ret)
return ret;
}
return 0;
}
static const struct smu_temperature_range smu13_thermal_policy[] =
{
{-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
@ -2234,7 +2203,7 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
.feature_is_enabled = smu_cmn_feature_is_enabled,
.print_clk_levels = smu_v13_0_7_print_clk_levels,
.force_clk_levels = smu_v13_0_7_force_clk_levels,
.update_pcie_parameters = smu_v13_0_7_update_pcie_parameters,
.update_pcie_parameters = smu_v13_0_update_pcie_parameters,
.get_thermal_temperature_range = smu_v13_0_7_get_thermal_temperature_range,
.register_irq_handler = smu_v13_0_register_irq_handler,
.enable_thermal_alert = smu_v13_0_enable_thermal_alert,