From ece6d76b8a474b4665ad1e4e3082735adec6984b Mon Sep 17 00:00:00 2001 From: Jyri Sarha Date: Thu, 17 Mar 2022 17:23:41 +0200 Subject: [PATCH] ll_schedule: Update DSP load tracker to provide per task numbers Calculates average and records the maximum execution cycles taken by each execution round and prints the results out once every 1024 round, e.g. roughly once per second with a typical 1ms tick configuration. The output is mimicked from the Zephyr side scheduler output. There is a Kconfig menu option under debug menu to disable this feature. Currently the default is on. Signed-off-by: Jyri Sarha --- Kconfig.sof | 9 ++++++++ src/include/sof/schedule/task.h | 2 ++ src/schedule/ll_schedule.c | 40 +++++++++++++++++++++++++++++++++ 3 files changed, 51 insertions(+) diff --git a/Kconfig.sof b/Kconfig.sof index 63468b48b..51bca5257 100644 --- a/Kconfig.sof +++ b/Kconfig.sof @@ -171,6 +171,15 @@ config DEBUG_IPC_COUNTERS help Select for enabling tracing IPC counter in SRAM_REG mailbox +config SCHEDULE_LOG_CYCLE_STATISTICS + bool "Log cycles per tick statistics for each task separately" + default y + help + Log DSP cycle usage statistics about once per second (1ms * + 1024) for each task separately. The printed data is task's + meta information, average number of cycles/tick, and maximum + number of cycles/tick during the previous 1024 tick period. + config PERFORMANCE_COUNTERS bool "Performance counters" default n diff --git a/src/include/sof/schedule/task.h b/src/include/sof/schedule/task.h index a33bf0d5d..191ca28b6 100644 --- a/src/include/sof/schedule/task.h +++ b/src/include/sof/schedule/task.h @@ -69,6 +69,8 @@ struct task { struct task_ops ops; /**< task operations */ #ifdef __ZEPHYR__ struct k_work_delayable z_delayed_work; +#endif +#if defined(CONFIG_SCHEDULE_LOG_CYCLE_STATISTICS) || defined(__ZEPHYR__) uint32_t cycles_sum; uint32_t cycles_max; uint32_t cycles_cnt; diff --git a/src/schedule/ll_schedule.c b/src/schedule/ll_schedule.c index dad692a9b..4b6154565 100644 --- a/src/schedule/ll_schedule.c +++ b/src/schedule/ll_schedule.c @@ -150,6 +150,35 @@ static void schedule_ll_task_done(struct ll_schedule_data *sch, atomic_read(&sch->domain->total_num_tasks)); } +/* perf measurement windows size 2^x */ +#define CHECKS_WINDOW_SIZE 10 + +#ifdef CONFIG_SCHEDULE_LOG_CYCLE_STATISTICS +static inline void dsp_load_check(struct task *task, uint32_t cycles0, uint32_t cycles1) +{ + uint32_t diff; + + if (cycles1 > cycles0) + diff = cycles1 - cycles0; + else + diff = UINT32_MAX - cycles0 + cycles1; + + task->cycles_sum += diff; + + if (task->cycles_max < diff) + task->cycles_max = diff; + + if (++task->cycles_cnt == 1 << CHECKS_WINDOW_SIZE) { + task->cycles_sum >>= CHECKS_WINDOW_SIZE; + tr_info(&ll_tr, "task %p %pU avg %u, max %u", task, task->uid, + task->cycles_sum, task->cycles_max); + task->cycles_sum = 0; + task->cycles_max = 0; + task->cycles_cnt = 0; + } +} +#endif + static void schedule_ll_tasks_execute(struct ll_schedule_data *sch) { struct ll_schedule_domain *domain = sch->domain; @@ -166,6 +195,9 @@ static void schedule_ll_tasks_execute(struct ll_schedule_data *sch) * a pipeline task terminates a DMIC task. */ while (wlist != &sch->tasks) { +#ifdef CONFIG_SCHEDULE_LOG_CYCLE_STATISTICS + uint32_t cycles0, cycles1; +#endif task = list_item(wlist, struct task, list); if (task->state != SOF_TASK_STATE_PENDING) { @@ -175,6 +207,9 @@ static void schedule_ll_tasks_execute(struct ll_schedule_data *sch) tr_dbg(&ll_tr, "task %p %pU being started...", task, task->uid); +#ifdef CONFIG_SCHEDULE_LOG_CYCLE_STATISTICS + cycles0 = (uint32_t)k_cycle_get_64(); +#endif task->state = SOF_TASK_STATE_RUNNING; /* @@ -199,6 +234,11 @@ static void schedule_ll_tasks_execute(struct ll_schedule_data *sch) } k_spin_unlock(&domain->lock, key); + +#ifdef CONFIG_SCHEDULE_LOG_CYCLE_STATISTICS + cycles1 = (uint32_t)k_cycle_get_64(); + dsp_load_check(task, cycles0, cycles1); +#endif } }