Fix a performance regression on large SMT systems, an Intel SMT4
balancing bug, and a topology setup bug on (Intel) hybrid processors. Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmUHOVQRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1iOahAAj3YsoNbT/k6m9yp622n1OopaNEQvsK+/ F2Q5g/hJrm3+W5764rF8CvjhDbmrv6owjp3yUyZLDIfSAFZYMvwoNody3a373Yr3 VFBMJ00jNIv/TAFCJZYeybg3yViwObKKfpu4JBj//QU+4uGWCoBMolkVekU2bBti r50fMxBPgg2Yic57DCC8Y+JZzHI/ydQ3rvVXMzkrTZCO/zY4/YmERM9d+vp4wl4B uG9cfXQ4Yf/1gZo0XDlTUkOJUXPnkMgi+N4eHYlGuyOCoIZOfATI24hRaPBoQcdx PDwHcKmyNxH9iaRppNQMvi797g3KrKVEmZwlZg1IfsILhKC0F4GsQ85tw8qQWE8j brFPkWVUxAUSl4LXoqVInaxDHmJWR2UC3RA7b+BxFF/GMLTow0z4a+JMC6eKlNyR uBisZnuEuecqwF9TLhyD3KBHh7PihUPz8PuFHk+Um5sggaUE82I+VwX6uxEi5y8r ke2kAkpuMxPWT5lwDmFPAXWfvpZz5vvTIRUxGGj2+s4d8b0YfLtZsx5+uOIacaub Gw+wYFfSowph72tR/SUVq0An/UTSPPBxty8eFIVeE6sW9bw3ghTtkf8300xjV7Rj sKVxXy/podAo8wG7R6aZfTfsCpohmeEjskiatYdThYamPPx7V0R5pq4twmTXTHLJ bFvQ1GFCOu0= =jIeN -----END PGP SIGNATURE----- Merge tag 'sched-urgent-2023-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull scheduler fixes from Ingo Molnar: "Fix a performance regression on large SMT systems, an Intel SMT4 balancing bug, and a topology setup bug on (Intel) hybrid processors" * tag 'sched-urgent-2023-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/sched: Restore the SD_ASYM_PACKING flag in the DIE domain sched/fair: Fix SMT4 group_smt_balance handling sched/fair: Optimize should_we_balance() for large SMT systems
This commit is contained in:
commit
e5a710d132
|
@ -579,7 +579,6 @@ static bool match_llc(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o)
|
|||
}
|
||||
|
||||
|
||||
#if defined(CONFIG_SCHED_SMT) || defined(CONFIG_SCHED_CLUSTER) || defined(CONFIG_SCHED_MC)
|
||||
static inline int x86_sched_itmt_flags(void)
|
||||
{
|
||||
return sysctl_sched_itmt_enabled ? SD_ASYM_PACKING : 0;
|
||||
|
@ -603,7 +602,14 @@ static int x86_cluster_flags(void)
|
|||
return cpu_cluster_flags() | x86_sched_itmt_flags();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static int x86_die_flags(void)
|
||||
{
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU))
|
||||
return x86_sched_itmt_flags();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set if a package/die has multiple NUMA nodes inside.
|
||||
|
@ -640,7 +646,7 @@ static void __init build_sched_topology(void)
|
|||
*/
|
||||
if (!x86_has_numa_in_package) {
|
||||
x86_topology[i++] = (struct sched_domain_topology_level){
|
||||
cpu_cpu_mask, SD_INIT_NAME(DIE)
|
||||
cpu_cpu_mask, x86_die_flags, SD_INIT_NAME(DIE)
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -6619,6 +6619,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
|
|||
/* Working cpumask for: load_balance, load_balance_newidle. */
|
||||
static DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
|
||||
static DEFINE_PER_CPU(cpumask_var_t, select_rq_mask);
|
||||
static DEFINE_PER_CPU(cpumask_var_t, should_we_balance_tmpmask);
|
||||
|
||||
#ifdef CONFIG_NO_HZ_COMMON
|
||||
|
||||
|
@ -9579,7 +9580,7 @@ static inline long sibling_imbalance(struct lb_env *env,
|
|||
imbalance /= ncores_local + ncores_busiest;
|
||||
|
||||
/* Take advantage of resource in an empty sched group */
|
||||
if (imbalance == 0 && local->sum_nr_running == 0 &&
|
||||
if (imbalance <= 1 && local->sum_nr_running == 0 &&
|
||||
busiest->sum_nr_running > 1)
|
||||
imbalance = 2;
|
||||
|
||||
|
@ -9767,6 +9768,15 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|||
break;
|
||||
|
||||
case group_smt_balance:
|
||||
/*
|
||||
* Check if we have spare CPUs on either SMT group to
|
||||
* choose has spare or fully busy handling.
|
||||
*/
|
||||
if (sgs->idle_cpus != 0 || busiest->idle_cpus != 0)
|
||||
goto has_spare;
|
||||
|
||||
fallthrough;
|
||||
|
||||
case group_fully_busy:
|
||||
/*
|
||||
* Select the fully busy group with highest avg_load. In
|
||||
|
@ -9806,6 +9816,7 @@ static bool update_sd_pick_busiest(struct lb_env *env,
|
|||
else
|
||||
return true;
|
||||
}
|
||||
has_spare:
|
||||
|
||||
/*
|
||||
* Select not overloaded group with lowest number of idle cpus
|
||||
|
@ -10917,6 +10928,7 @@ static int active_load_balance_cpu_stop(void *data);
|
|||
|
||||
static int should_we_balance(struct lb_env *env)
|
||||
{
|
||||
struct cpumask *swb_cpus = this_cpu_cpumask_var_ptr(should_we_balance_tmpmask);
|
||||
struct sched_group *sg = env->sd->groups;
|
||||
int cpu, idle_smt = -1;
|
||||
|
||||
|
@ -10940,8 +10952,9 @@ static int should_we_balance(struct lb_env *env)
|
|||
return 1;
|
||||
}
|
||||
|
||||
cpumask_copy(swb_cpus, group_balance_mask(sg));
|
||||
/* Try to find first idle CPU */
|
||||
for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
|
||||
for_each_cpu_and(cpu, swb_cpus, env->cpus) {
|
||||
if (!idle_cpu(cpu))
|
||||
continue;
|
||||
|
||||
|
@ -10953,6 +10966,14 @@ static int should_we_balance(struct lb_env *env)
|
|||
if (!(env->sd->flags & SD_SHARE_CPUCAPACITY) && !is_core_idle(cpu)) {
|
||||
if (idle_smt == -1)
|
||||
idle_smt = cpu;
|
||||
/*
|
||||
* If the core is not idle, and first SMT sibling which is
|
||||
* idle has been found, then its not needed to check other
|
||||
* SMT siblings for idleness:
|
||||
*/
|
||||
#ifdef CONFIG_SCHED_SMT
|
||||
cpumask_andnot(swb_cpus, swb_cpus, cpu_smt_mask(cpu));
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -12918,6 +12939,8 @@ __init void init_sched_fair_class(void)
|
|||
for_each_possible_cpu(i) {
|
||||
zalloc_cpumask_var_node(&per_cpu(load_balance_mask, i), GFP_KERNEL, cpu_to_node(i));
|
||||
zalloc_cpumask_var_node(&per_cpu(select_rq_mask, i), GFP_KERNEL, cpu_to_node(i));
|
||||
zalloc_cpumask_var_node(&per_cpu(should_we_balance_tmpmask, i),
|
||||
GFP_KERNEL, cpu_to_node(i));
|
||||
|
||||
#ifdef CONFIG_CFS_BANDWIDTH
|
||||
INIT_CSD(&cpu_rq(i)->cfsb_csd, __cfsb_csd_unthrottle, cpu_rq(i));
|
||||
|
|
Loading…
Reference in New Issue