From efc8b329c7fdc30921a7dfc109237523e1e5b1cc Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 21 Dec 2022 16:20:25 -0800 Subject: [PATCH] clocksource: Verify HPET and PMTMR when TSC unverified On systems with two or fewer sockets, when the boot CPU has CONSTANT_TSC, NONSTOP_TSC, and TSC_ADJUST, clocksource watchdog verification of the TSC is disabled. This works well much of the time, but there is the occasional production-level system that meets all of these criteria, but which still has a TSC that skews significantly from atomic-clock time. This is usually attributed to a firmware or hardware fault. Yes, the various NTP daemons do express their opinions of userspace-to-atomic-clock time skew, but they put them in various places, depending on the daemon and distro in question. It would therefore be good for the kernel to have some clue that there is a problem. The old behavior of marking the TSC unstable is a non-starter because a great many workloads simply cannot tolerate the overheads and latencies of the various non-TSC clocksources. In addition, NTP-corrected systems sometimes can tolerate significant kernel-space time skew as long as the userspace time sources are within epsilon of atomic-clock time. Therefore, when watchdog verification of TSC is disabled, enable it for HPET and PMTMR (AKA ACPI PM timer). This provides the needed in-kernel time-skew diagnostic without degrading the system's performance. Signed-off-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Daniel Lezcano Cc: Waiman Long Cc: Tested-by: Feng Tang --- arch/x86/include/asm/time.h | 1 + arch/x86/kernel/hpet.c | 2 ++ arch/x86/kernel/tsc.c | 5 +++++ drivers/clocksource/acpi_pm.c | 6 ++++-- 4 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/time.h b/arch/x86/include/asm/time.h index 8ac563abb567..a53961c64a56 100644 --- a/arch/x86/include/asm/time.h +++ b/arch/x86/include/asm/time.h @@ -8,6 +8,7 @@ extern void hpet_time_init(void); extern void time_init(void); extern bool pit_timer_init(void); +extern bool tsc_clocksource_watchdog_disabled(void); extern struct clock_event_device *global_clock_event; diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 71f336425e58..c8eb1ac5125a 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1091,6 +1091,8 @@ int __init hpet_enable(void) if (!hpet_counting()) goto out_nohpet; + if (tsc_clocksource_watchdog_disabled()) + clocksource_hpet.flags |= CLOCK_SOURCE_MUST_VERIFY; clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq); if (id & HPET_ID_LEGSUP) { diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 92bbc4a6b3fc..a5371c6d4b64 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1190,6 +1190,11 @@ static void __init tsc_disable_clocksource_watchdog(void) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; } +bool tsc_clocksource_watchdog_disabled(void) +{ + return !(clocksource_tsc.flags & CLOCK_SOURCE_MUST_VERIFY); +} + static void __init check_system_tsc_reliable(void) { #if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) diff --git a/drivers/clocksource/acpi_pm.c b/drivers/clocksource/acpi_pm.c index 279ddff81ab4..82338773602c 100644 --- a/drivers/clocksource/acpi_pm.c +++ b/drivers/clocksource/acpi_pm.c @@ -23,6 +23,7 @@ #include #include #include +#include /* * The I/O port the PMTMR resides at. @@ -210,8 +211,9 @@ static int __init init_acpi_pm_clocksource(void) return -ENODEV; } - return clocksource_register_hz(&clocksource_acpi_pm, - PMTMR_TICKS_PER_SEC); + if (tsc_clocksource_watchdog_disabled()) + clocksource_acpi_pm.flags |= CLOCK_SOURCE_MUST_VERIFY; + return clocksource_register_hz(&clocksource_acpi_pm, PMTMR_TICKS_PER_SEC); } /* We use fs_initcall because we want the PCI fixups to have run