acrn-kernel/arch/loongarch/kernel/process.c

354 lines
8.6 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0
/*
* Author: Huacai Chen <chenhuacai@loongson.cn>
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*
* Derived from MIPS:
* Copyright (C) 1994 - 1999, 2000 by Ralf Baechle and others.
* Copyright (C) 2005, 2006 by Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2004 Thiemo Seufer
* Copyright (C) 2013 Imagination Technologies Ltd.
*/
#include <linux/cpu.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task.h>
#include <linux/sched/task_stack.h>
#include <linux/mm.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/export.h>
#include <linux/ptrace.h>
#include <linux/mman.h>
#include <linux/personality.h>
#include <linux/sys.h>
#include <linux/completion.h>
#include <linux/kallsyms.h>
#include <linux/random.h>
#include <linux/prctl.h>
#include <linux/nmi.h>
#include <asm/asm.h>
#include <asm/bootinfo.h>
#include <asm/cpu.h>
#include <asm/elf.h>
#include <asm/fpu.h>
#include <asm/io.h>
#include <asm/irq.h>
#include <asm/irq_regs.h>
#include <asm/loongarch.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/reg.h>
#include <asm/unwind.h>
#include <asm/vdso.h>
/*
* Idle related variables and functions
*/
unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE;
EXPORT_SYMBOL(boot_option_idle_override);
#ifdef CONFIG_HOTPLUG_CPU
void arch_cpu_idle_dead(void)
{
play_dead();
}
#endif
asmlinkage void ret_from_fork(void);
asmlinkage void ret_from_kernel_thread(void);
void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp)
{
unsigned long crmd;
unsigned long prmd;
unsigned long euen;
/* New thread loses kernel privileges. */
crmd = regs->csr_crmd & ~(PLV_MASK);
crmd |= PLV_USER;
regs->csr_crmd = crmd;
prmd = regs->csr_prmd & ~(PLV_MASK);
prmd |= PLV_USER;
regs->csr_prmd = prmd;
euen = regs->csr_euen & ~(CSR_EUEN_FPEN);
regs->csr_euen = euen;
lose_fpu(0);
LoongArch: Fix and simplify fcsr initialization on execve() commit c2396651309eba291c15e32db8fbe44c738b5921 upstream. There has been a lingering bug in LoongArch Linux systems causing some GCC tests to intermittently fail (see Closes link). I've made a minimal reproducer: zsh% cat measure.s .align 4 .globl _start _start: movfcsr2gr $a0, $fcsr0 bstrpick.w $a0, $a0, 16, 16 beqz $a0, .ok break 0 .ok: li.w $a7, 93 syscall 0 zsh% cc mesaure.s -o measure -nostdlib zsh% echo $((1.0/3)) 0.33333333333333331 zsh% while ./measure; do ; done This while loop should not stop as POSIX is clear that execve must set fenv to the default, where FCSR should be zero. But in fact it will just stop after running for a while (normally less than 30 seconds). Note that "$((1.0/3))" is needed to reproduce this issue because it raises FE_INVALID and makes fcsr0 non-zero. The problem is we are currently relying on SET_PERSONALITY2() to reset current->thread.fpu.fcsr. But SET_PERSONALITY2() is executed before start_thread which calls lose_fpu(0). We can see if kernel preempt is enabled, we may switch to another thread after SET_PERSONALITY2() but before lose_fpu(0). Then bad thing happens: during the thread switch the value of the fcsr0 register is stored into current->thread.fpu.fcsr, making it dirty again. The issue can be fixed by setting current->thread.fpu.fcsr after lose_fpu(0) because lose_fpu() clears TIF_USEDFPU, then the thread switch won't touch current->thread.fpu.fcsr. The only other architecture setting FCSR in SET_PERSONALITY2() is MIPS. I've ran a similar test on MIPS with mainline kernel and it turns out MIPS is buggy, too. Anyway MIPS do this for supporting different FP flavors (NaN encodings, etc.) which do not exist on LoongArch. So for LoongArch, we can simply remove the current->thread.fpu.fcsr setting from SET_PERSONALITY2() and do it in start_thread(), after lose_fpu(0). The while loop failing with the mainline kernel has survived one hour after this change on LoongArch. Fixes: 803b0fc5c3f2baa ("LoongArch: Add process management") Closes: https://github.com/loongson-community/discussions/issues/7 Link: https://lore.kernel.org/linux-mips/7a6aa1bbdbbe2e63ae96ff163fab0349f58f1b9e.camel@xry111.site/ Cc: stable@vger.kernel.org Signed-off-by: Xi Ruoyao <xry111@xry111.site> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2024-01-17 12:43:08 +08:00
current->thread.fpu.fcsr = boot_cpu_data.fpu_csr0;
clear_thread_flag(TIF_LSX_CTX_LIVE);
clear_thread_flag(TIF_LASX_CTX_LIVE);
clear_used_math();
regs->csr_era = pc;
regs->regs[3] = sp;
}
void exit_thread(struct task_struct *tsk)
{
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
/*
* Save any process state which is live in hardware registers to the
* parent context prior to duplication. This prevents the new child
* state becoming stale if the parent is preempted before copy_thread()
* gets a chance to save the parent's live hardware registers to the
* child context.
*/
preempt_disable();
if (is_fpu_owner())
save_fp(current);
preempt_enable();
if (used_math())
memcpy(dst, src, sizeof(struct task_struct));
else
memcpy(dst, src, offsetof(struct task_struct, thread.fpu.fpr));
return 0;
}
/*
* Copy architecture-specific thread state
*/
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long childksp;
unsigned long tls = args->tls;
unsigned long usp = args->stack;
unsigned long clone_flags = args->flags;
struct pt_regs *childregs, *regs = current_pt_regs();
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
/* set up new TSS. */
childregs = (struct pt_regs *) childksp - 1;
/* Put the stack after the struct pt_regs. */
childksp = (unsigned long) childregs;
p->thread.sched_cfa = 0;
p->thread.csr_euen = 0;
p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD);
p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD);
p->thread.csr_ecfg = csr_read32(LOONGARCH_CSR_ECFG);
if (unlikely(args->fn)) {
/* kernel thread */
p->thread.reg03 = childksp;
p->thread.reg23 = (unsigned long)args->fn;
p->thread.reg24 = (unsigned long)args->fn_arg;
p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
p->thread.sched_ra = (unsigned long)ret_from_kernel_thread;
memset(childregs, 0, sizeof(struct pt_regs));
childregs->csr_euen = p->thread.csr_euen;
childregs->csr_crmd = p->thread.csr_crmd;
childregs->csr_prmd = p->thread.csr_prmd;
childregs->csr_ecfg = p->thread.csr_ecfg;
LoongArch: Clear FPU/SIMD thread info flags for kernel thread If a kernel thread is created by a user thread, it may carry FPU/SIMD thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be considered as a fpu owner and kernel try to save its FPU/SIMD context and cause such errors: [ 41.518931] do_fpu invoked from kernel context![#1]: [ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 [ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 [ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 [ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 [ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc [ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 [ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 [ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 [ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 [ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 [ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 [ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 [ 41.619160] CSR crmd: 000000b0 [ 41.619163] CSR prmd: 00000000 [ 41.622359] CSR euen: 00000000 [ 41.625558] CSR ecfg: 00071c1c [ 41.628756] CSR estat: 000f0000 [ 41.635239] ExcCode : f (SubCode 0) [ 41.638783] PrId : 0014c010 (Loongson-64bit) [ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs [ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) [ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 [ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 [ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 [ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 [ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 [ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 [ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 [ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 [ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 [ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 41.742745] ... [ 41.745252] Call Trace: [ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 [ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 [ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 [ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 [ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 [ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c This can be easily triggered by ltp testcase syscalls/io_uring02 and it can also be easily fixed by clearing the FPU/SIMD thread info flags for kernel threads in copy_thread(). Cc: stable@vger.kernel.org Reported-by: Qi Hu <huqi@loongson.cn> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2022-11-21 19:02:57 +08:00
goto out;
}
/* user thread */
*childregs = *regs;
childregs->regs[4] = 0; /* Child gets zero as return value */
if (usp)
childregs->regs[3] = usp;
p->thread.reg03 = (unsigned long) childregs;
p->thread.reg01 = (unsigned long) ret_from_fork;
p->thread.sched_ra = (unsigned long) ret_from_fork;
/*
* New tasks lose permission to use the fpu. This accelerates context
* switching for most programs since they don't use the fpu.
*/
childregs->csr_euen = 0;
LoongArch: Clear FPU/SIMD thread info flags for kernel thread If a kernel thread is created by a user thread, it may carry FPU/SIMD thread info flags (TIF_USEDFPU, TIF_USEDSIMD, etc.). Then it will be considered as a fpu owner and kernel try to save its FPU/SIMD context and cause such errors: [ 41.518931] do_fpu invoked from kernel context![#1]: [ 41.523933] CPU: 1 PID: 395 Comm: iou-wrk-394 Not tainted 6.1.0-rc5+ #217 [ 41.530757] Hardware name: Loongson Loongson-3A5000-7A1000-1w-CRB/Loongson-LS3A5000-7A1000-1w-CRB, BIOS vUDK2018-LoongArch-V2.0.pre-beta8 08/18/2022 [ 41.544064] $ 0 : 0000000000000000 90000000011e9468 9000000106c7c000 9000000106c7fcf0 [ 41.552101] $ 4 : 9000000106305d40 9000000106689800 9000000106c7fd08 0000000003995818 [ 41.560138] $ 8 : 0000000000000001 90000000009a72e4 0000000000000020 fffffffffffffffc [ 41.568174] $12 : 0000000000000000 0000000000000000 0000000000000020 00000009aab7e130 [ 41.576211] $16 : 00000000000001ff 0000000000000407 0000000000000001 0000000000000000 [ 41.584247] $20 : 0000000000000000 0000000000000001 9000000106c7fd70 90000001002f0400 [ 41.592284] $24 : 0000000000000000 900000000178f740 90000000011e9834 90000001063057c0 [ 41.600320] $28 : 0000000000000000 0000000000000001 9000000006826b40 9000000106305140 [ 41.608356] era : 9000000000228848 _save_fp+0x0/0xd8 [ 41.613542] ra : 90000000011e9468 __schedule+0x568/0x8d0 [ 41.619160] CSR crmd: 000000b0 [ 41.619163] CSR prmd: 00000000 [ 41.622359] CSR euen: 00000000 [ 41.625558] CSR ecfg: 00071c1c [ 41.628756] CSR estat: 000f0000 [ 41.635239] ExcCode : f (SubCode 0) [ 41.638783] PrId : 0014c010 (Loongson-64bit) [ 41.643191] Modules linked in: acpi_ipmi vfat fat ipmi_si ipmi_devintf cfg80211 ipmi_msghandler rfkill fuse efivarfs [ 41.653734] Process iou-wrk-394 (pid: 395, threadinfo=0000000004ebe913, task=00000000636fa1be) [ 41.662375] Stack : 00000000ffff0875 9000000006800ec0 9000000006800ec0 90000000002d57e0 [ 41.670412] 0000000000000001 0000000000000000 9000000106535880 0000000000000001 [ 41.678450] 9000000105291800 0000000000000000 9000000105291838 900000000178e000 [ 41.686487] 9000000106c7fd90 9000000106305140 0000000000000001 90000000011e9834 [ 41.694523] 00000000ffff0875 90000000011f034c 9000000105291838 9000000105291830 [ 41.702561] 0000000000000000 9000000006801440 00000000ffff0875 90000000002d48c0 [ 41.710597] 9000000128800001 9000000106305140 9000000105291838 9000000105291838 [ 41.718634] 9000000105291830 9000000107811740 9000000105291848 90000000009bf1e0 [ 41.726672] 9000000105291830 9000000107811748 2d6b72772d756f69 0000000000343933 [ 41.734708] 0000000000000000 0000000000000000 0000000000000000 0000000000000000 [ 41.742745] ... [ 41.745252] Call Trace: [ 42.197868] [<9000000000228848>] _save_fp+0x0/0xd8 [ 42.205214] [<90000000011ed468>] __schedule+0x568/0x8d0 [ 42.210485] [<90000000011ed834>] schedule+0x64/0xd4 [ 42.215411] [<90000000011f434c>] schedule_timeout+0x88/0x188 [ 42.221115] [<90000000009c36d0>] io_wqe_worker+0x184/0x350 [ 42.226645] [<9000000000221cf0>] ret_from_kernel_thread+0xc/0x9c This can be easily triggered by ltp testcase syscalls/io_uring02 and it can also be easily fixed by clearing the FPU/SIMD thread info flags for kernel threads in copy_thread(). Cc: stable@vger.kernel.org Reported-by: Qi Hu <huqi@loongson.cn> Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
2022-11-21 19:02:57 +08:00
if (clone_flags & CLONE_SETTLS)
childregs->regs[2] = tls;
out:
clear_tsk_thread_flag(p, TIF_USEDFPU);
clear_tsk_thread_flag(p, TIF_USEDSIMD);
clear_tsk_thread_flag(p, TIF_LSX_CTX_LIVE);
clear_tsk_thread_flag(p, TIF_LASX_CTX_LIVE);
return 0;
}
unsigned long __get_wchan(struct task_struct *task)
{
unsigned long pc = 0;
struct unwind_state state;
if (!try_get_task_stack(task))
return 0;
for (unwind_start(&state, task, NULL);
!unwind_done(&state); unwind_next_frame(&state)) {
pc = unwind_get_return_address(&state);
if (!pc)
break;
if (in_sched_functions(pc))
continue;
break;
}
put_task_stack(task);
return pc;
}
bool in_irq_stack(unsigned long stack, struct stack_info *info)
{
unsigned long nextsp;
unsigned long begin = (unsigned long)this_cpu_read(irq_stack);
unsigned long end = begin + IRQ_STACK_START;
if (stack < begin || stack >= end)
return false;
nextsp = *(unsigned long *)end;
if (nextsp & (SZREG - 1))
return false;
info->begin = begin;
info->end = end;
info->next_sp = nextsp;
info->type = STACK_TYPE_IRQ;
return true;
}
bool in_task_stack(unsigned long stack, struct task_struct *task,
struct stack_info *info)
{
unsigned long begin = (unsigned long)task_stack_page(task);
unsigned long end = begin + THREAD_SIZE;
if (stack < begin || stack >= end)
return false;
info->begin = begin;
info->end = end;
info->next_sp = 0;
info->type = STACK_TYPE_TASK;
return true;
}
int get_stack_info(unsigned long stack, struct task_struct *task,
struct stack_info *info)
{
task = task ? : current;
if (!stack || stack & (SZREG - 1))
goto unknown;
if (in_task_stack(stack, task, info))
return 0;
if (task != current)
goto unknown;
if (in_irq_stack(stack, info))
return 0;
unknown:
info->type = STACK_TYPE_UNKNOWN;
return -EINVAL;
}
unsigned long stack_top(void)
{
unsigned long top = TASK_SIZE & PAGE_MASK;
/* Space for the VDSO & data page */
top -= PAGE_ALIGN(current->thread.vdso->size);
top -= PAGE_SIZE;
/* Space to randomize the VDSO base */
if (current->flags & PF_RANDOMIZE)
top -= VDSO_RANDOMIZE_SIZE;
return top;
}
/*
* Don't forget that the stack pointer must be aligned on a 8 bytes
* boundary for 32-bits ABI and 16 bytes for 64-bits ABI.
*/
unsigned long arch_align_stack(unsigned long sp)
{
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
sp -= get_random_u32_below(PAGE_SIZE);
return sp & STACK_ALIGN;
}
static DEFINE_PER_CPU(call_single_data_t, backtrace_csd);
static struct cpumask backtrace_csd_busy;
static void handle_backtrace(void *info)
{
nmi_cpu_backtrace(get_irq_regs());
cpumask_clear_cpu(smp_processor_id(), &backtrace_csd_busy);
}
static void raise_backtrace(cpumask_t *mask)
{
call_single_data_t *csd;
int cpu;
for_each_cpu(cpu, mask) {
/*
* If we previously sent an IPI to the target CPU & it hasn't
* cleared its bit in the busy cpumask then it didn't handle
* our previous IPI & it's not safe for us to reuse the
* call_single_data_t.
*/
if (cpumask_test_and_set_cpu(cpu, &backtrace_csd_busy)) {
pr_warn("Unable to send backtrace IPI to CPU%u - perhaps it hung?\n",
cpu);
continue;
}
csd = &per_cpu(backtrace_csd, cpu);
csd->func = handle_backtrace;
smp_call_function_single_async(cpu, csd);
}
}
void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
{
nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace);
}
#ifdef CONFIG_64BIT
void loongarch_dump_regs64(u64 *uregs, const struct pt_regs *regs)
{
unsigned int i;
for (i = LOONGARCH_EF_R1; i <= LOONGARCH_EF_R31; i++) {
uregs[i] = regs->regs[i - LOONGARCH_EF_R0];
}
uregs[LOONGARCH_EF_ORIG_A0] = regs->orig_a0;
uregs[LOONGARCH_EF_CSR_ERA] = regs->csr_era;
uregs[LOONGARCH_EF_CSR_BADV] = regs->csr_badvaddr;
uregs[LOONGARCH_EF_CSR_CRMD] = regs->csr_crmd;
uregs[LOONGARCH_EF_CSR_PRMD] = regs->csr_prmd;
uregs[LOONGARCH_EF_CSR_EUEN] = regs->csr_euen;
uregs[LOONGARCH_EF_CSR_ECFG] = regs->csr_ecfg;
uregs[LOONGARCH_EF_CSR_ESTAT] = regs->csr_estat;
}
#endif /* CONFIG_64BIT */