debug: debug_stream_thread_info: Thread info records over debug stream

Implementation for collecting Thread stack and CPU usage statistic from
Zephyr core and send the info and debug_stream records. The records
encoding is defined and documented in debug_stream_thread_info.c
source file.

Signed-off-by: Jyri Sarha <jyri.sarha@linux.intel.com>
This commit is contained in:
Jyri Sarha 2024-07-31 23:59:49 +03:00 committed by Kai Vehmanen
parent fdfddff696
commit 8f4672f669
4 changed files with 435 additions and 1 deletions

View File

@ -1,3 +1,5 @@
# SPDX-License-Identifier: BSD-3-Clause
add_local_sources_ifdef(CONFIG_SOF_DEBUG_STREAM_SLOT sof debug_stream_slot.c)
add_local_sources_ifdef(CONFIG_SOF_DEBUG_STREAM_THREAD_INFO sof debug_stream_thread_info.c)

View File

@ -22,5 +22,26 @@ config SOF_DEBUG_STREAM_SLOT_NUMBER
used for descriptors, so for slot 3 to be mapped, the WIN_2_SIZE
must be (1 + 3) * 4k = 16k or greater.
endif
config SOF_DEBUG_STREAM_THREAD_INFO
bool "Enable Zephyr thread info reporting through Debug-Stream"
select INIT_STACKS
select THREAD_MONITOR
select THREAD_STACK_INFO
select THREAD_RUNTIME_STATS
help
This activates Zephyr thread info reporting through
Debug-Stream. Thread info reports some basic live data from
the Zephyr threads, like stack usage high-water-mark and CPU
usage. Please select THREAD_NAME=y for the thread names more
than just hex numbers.
config SOF_DEBUG_STREAM_THREAD_INFO_INTERVAL
int "Thread information collection interval in seconds"
depends on SOF_DEBUG_STREAM_THREAD_INFO
default 2
range 1 10
help
Decides how often thread info runs and checks execution cycle
statistics and stack usage.
endif

View File

@ -0,0 +1,378 @@
// SPDX-License-Identifier: BSD-3-Clause
//
// Copyright(c) 2024 Intel Corporation.
#include <zephyr/logging/log.h>
#include <zephyr/kernel.h>
#include <rtos/alloc.h>
#include <ipc/topology.h>
#include <stdio.h>
#include <soc.h>
#include <adsp_debug_window.h>
#include <user/debug_stream_thread_info.h>
LOG_MODULE_REGISTER(thread_info);
#define THREAD_INFO_MAX_THREADS 64
/* Must be bigger than sizeof(struct thread_info_record_hdr) */
#define THREAD_INFO_INITIAL_RECORD_BUFFER_SIZE 256
#ifdef CONFIG_THREAD_RUNTIME_STATS
/* Data structure to store the cycle counter values from the previous
* round. The numbers are used to calculate what the load was on this
* round.
*/
static struct previous_counters { /* Cached data from previous round */
uint64_t active; /* All execution cycles */
uint64_t all; /* All cycles including idle */
struct thread_counters {
void *tid; /* thread ID (the thread struct ptr) */
uint64_t cycles; /* cycle counter value */
} threads[THREAD_INFO_MAX_THREADS]; /* The max amount of threads we follow */
} __aligned(CONFIG_DCACHE_LINE_SIZE) previous[CONFIG_MP_MAX_NUM_CPUS];
#endif
/*
* Buffer structure for building the Debug Stream record before
* sending it forward. The buffer is persistent and each record is
* build in the same buffer. Its reallocated as double size if it
* becomes too small.
*/
struct record_buf {
size_t size;
size_t w_ptr;
uint8_t *buf;
};
/*
* Data structure to be passed down to thread_info_cb() by
* k_thread_foreach_current_cpu().
*/
struct user_data {
int core;
struct record_buf *bufd;
int thread_count;
#ifdef CONFIG_THREAD_RUNTIME_STATS
bool stats_valid;
uint32_t all_cycles;
void *active_threads[THREAD_INFO_MAX_THREADS];
struct previous_counters *previous;
#endif
};
#ifdef CONFIG_THREAD_RUNTIME_STATS
/* Finds and/or updates cached cycle counter value for 'tid'-thread
* and calculates used execution cycles since previous round and
* returns it. The tid is actually a k_tread pointer, but since its
* used just as an id and never accessed, I pass it as a void pointer.
*/
static uint32_t thread_info_get_cycles(void *tid, k_thread_runtime_stats_t *thread_stats,
struct user_data *ud, const char *name)
{
int i;
if (ud->thread_count >= ARRAY_SIZE(ud->active_threads)) {
LOG_WRN("Thread could exceeds tha max threads %u >= %u",
ud->thread_count, ARRAY_SIZE(ud->active_threads));
return 0;
}
/* Mark the thread as active */
ud->active_threads[ud->thread_count] = tid;
/* look for cached value from previous round for 'tid'-thread */
for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) {
if (ud->previous->threads[i].tid == tid) {
/* Calculate number cycles since previous round */
uint32_t cycles = (uint32_t) (thread_stats->execution_cycles -
ud->previous->threads[i].cycles);
LOG_DBG("%p found at %d (%s %llu)", tid, i,
name, thread_stats->execution_cycles);
/* update cached value */
ud->previous->threads[i].cycles = thread_stats->execution_cycles;
return cycles;
}
}
/* If no cached value was found, look for an empty slot to store the recent value */
for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) {
if (ud->previous->threads[i].tid == NULL) {
ud->previous->threads[i].tid = tid;
ud->previous->threads[i].cycles = thread_stats->execution_cycles;
LOG_DBG("%p placed at %d (%s %llu)", tid, i,
name, ud->previous->threads[i].cycles);
break;
}
}
/* If there is more than THREAD_INFO_MAX_THREADS threads in this core */
if (i == ARRAY_SIZE(ud->previous->threads))
LOG_WRN("No place found for %s %p", name, tid);
/* If there was no previous counter value to compare, return 0 cycles. */
return 0;
}
static uint8_t thread_info_cpu_utilization(struct k_thread *thread,
struct user_data *ud, const char *name)
{
k_thread_runtime_stats_t thread_stats;
uint32_t cycles;
int ret;
if (!ud->stats_valid)
return 0;
if (k_thread_runtime_stats_get(thread, &thread_stats) != 0)
return 0;
cycles = thread_info_get_cycles(thread, &thread_stats, ud, name);
LOG_DBG("thread %s %u / %u", name, cycles, ud->all_cycles);
return (uint8_t) ((255llu * cycles) / ud->all_cycles);
}
#else
static uint8_t thread_info_cpu_utilization(struct k_thread *thread,
struct user_data *ud, const char *name)
{
return 0;
}
#endif
#ifdef CONFIG_THREAD_STACK_INFO
static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name)
{
size_t stack_size, stack_unused;
int ret;
stack_size = thread->stack_info.size;
ret = k_thread_stack_space_get(thread, &stack_unused);
if (ret) {
LOG_ERR(" %-20s: unable to get stack space (%d)",
name, ret);
stack_unused = 0;
}
return (UINT8_MAX * (stack_size - stack_unused)) / stack_size;
}
#else
static uint8_t thread_info_stack_level(struct k_thread *thread, const char *name)
{
return 0;
}
#endif
static int thread_info_buf_realloc(struct record_buf *bufd, size_t req_size)
{
size_t size = bufd->size;
while (size < bufd->w_ptr + req_size)
size *= 2;
if (size != bufd->size) {
uint8_t *buf = rmalloc(SOF_MEM_ZONE_RUNTIME, 0, SOF_MEM_CAPS_RAM, size);
if (!buf)
return -ENOMEM;
memcpy_s(buf, size, bufd->buf, bufd->w_ptr);
rfree(bufd->buf);
bufd->size = size;
bufd->buf = buf;
}
return 0;
}
static void thread_info_cb(const struct k_thread *cthread, void *user_data)
{
struct k_thread *thread = (struct k_thread *)cthread;
struct user_data *ud = user_data;
struct thread_info *tinfo;
const char *name;
name = k_thread_name_get((k_tid_t)thread);
if (!name || name[0] == '\0') {
size_t ptr_str_len = 11; /* length of "0x12345678\0" */
if (thread_info_buf_realloc(ud->bufd, sizeof(*tinfo) + ptr_str_len))
return;
tinfo = (struct thread_info *) &ud->bufd->buf[ud->bufd->w_ptr];
snprintk(tinfo->name, ptr_str_len, "%p", (void *)thread);
/* Drop the terminating '\0', that is why -1 is here. */
tinfo->name_len = ptr_str_len - 1;
ud->bufd->w_ptr += sizeof(*tinfo) + ptr_str_len - 1;
} else {
size_t len = strlen(name);
if (thread_info_buf_realloc(ud->bufd, sizeof(*tinfo) + len + 1))
return;
tinfo = (struct thread_info *) &ud->bufd->buf[ud->bufd->w_ptr];
strncpy(tinfo->name, name, len + 1);
tinfo->name_len = len;
ud->bufd->w_ptr += sizeof(*tinfo) + len;
}
tinfo->stack_usage = thread_info_stack_level(thread, name);
tinfo->cpu_usage = thread_info_cpu_utilization(thread, ud, name);
LOG_DBG("core %u %s stack %u%% cpu %u%%", ud->core,
tinfo->name, tinfo->stack_usage * 100U / 255,
tinfo->cpu_usage * 100U / 255);
ud->thread_count++;
}
#ifdef CONFIG_THREAD_RUNTIME_STATS
/* Marks cached thread cycle counter entries of removed threads
* free. This also happens to threads pinned to other cpu than the
* primary. In the beginning they are listed on primary cpu, until the
* pinned cpu is started up and the thread is executed for the fist
* time and it moves to the cpu its pinned on.
*/
static void cleanup_old_thread_cycles(struct user_data *ud)
{
int i, j;
for (i = 0; i < ARRAY_SIZE(ud->previous->threads); i++) {
bool found = false;
/* This entry is already free, continue */
if (ud->previous->threads[i].tid == NULL)
continue;
/* Check if the thread was seen on previous round */
for (j = 0; j < ud->thread_count; j++) {
if (ud->active_threads[j] == ud->previous->threads[i].tid) {
found = true;
break;
}
}
/* If the thead is not any more active, mark the entry free */
if (!found) {
ud->previous->threads[i].tid = NULL;
ud->previous->threads[i].cycles = 0;
}
}
}
#else
static void cleanup_old_thread_cycles(struct user_data *ud) { }
#endif
static void thread_info_get(int core, struct record_buf *bufd)
{
k_thread_runtime_stats_t core_stats;
struct user_data ud = {
.core = core,
.bufd = bufd,
.thread_count = 0,
#ifdef CONFIG_THREAD_RUNTIME_STATS
.active_threads = { NULL },
.previous = &previous[core],
#endif
};
struct thread_info_record_hdr *hdr;
uint8_t load = 0;
#ifdef CONFIG_THREAD_RUNTIME_STATS
int ret = k_thread_runtime_stats_cpu_get(core, &core_stats);
if (ret == 0) {
uint32_t active_cycles = (uint32_t) (core_stats.total_cycles -
ud.previous->active);
uint32_t all_cycles = (uint32_t) (core_stats.execution_cycles -
ud.previous->all);
ud.stats_valid = true;
load = (uint8_t) ((255LLU * active_cycles) / all_cycles);
LOG_DBG("Core %u load %u / %u total %llu / %llu", core,
active_cycles, all_cycles,
core_stats.total_cycles, core_stats.execution_cycles);
ud.previous->active = core_stats.total_cycles;
ud.previous->all = core_stats.execution_cycles;
ud.all_cycles = all_cycles;
}
#endif
hdr = (struct thread_info_record_hdr *) bufd->buf;
bufd->w_ptr = sizeof(*hdr);
hdr->hdr.id = DEBUG_STREAM_RECORD_ID_THREAD_INFO;
hdr->load = load;
/* This is best effort debug tool. Unlocked version should be fine. */
k_thread_foreach_unlocked_filter_by_cpu(core, thread_info_cb, &ud);
cleanup_old_thread_cycles(&ud);
hdr->thread_count = ud.thread_count;
hdr->hdr.size_words = SOF_DIV_ROUND_UP(bufd->w_ptr, sizeof(hdr->hdr.data[0]));
debug_stream_slot_send_record(&hdr->hdr);
}
static void thread_info_run(void *cnum, void *a, void *b)
{
int core = (int) cnum;
struct record_buf bufd = {
.size = THREAD_INFO_INITIAL_RECORD_BUFFER_SIZE,
.w_ptr = 0,
};
bufd.buf = rmalloc(SOF_MEM_ZONE_RUNTIME, 0, SOF_MEM_CAPS_RAM, bufd.size);
if (!bufd.buf) {
LOG_ERR("malloc failed");
return;
}
for (;;) {
thread_info_get(core, &bufd);
k_sleep(K_SECONDS(CONFIG_SOF_DEBUG_STREAM_THREAD_INFO_INTERVAL));
}
}
#define THREAD_STACK_SIZE (2048)
static K_THREAD_STACK_ARRAY_DEFINE(info_thread_stacks, CONFIG_MP_MAX_NUM_CPUS,
THREAD_STACK_SIZE);
static struct k_thread info_thread[CONFIG_MP_MAX_NUM_CPUS];
static int thread_info_start(void)
{
uint32_t i;
for (i = 0; i < ARRAY_SIZE(info_thread); i++) {
char name[24];
k_tid_t tid;
int ret;
tid = k_thread_create(&info_thread[i], info_thread_stacks[i],
THREAD_STACK_SIZE, thread_info_run,
(void *) i, NULL, NULL,
K_LOWEST_APPLICATION_THREAD_PRIO, 0,
K_FOREVER);
if (!tid) {
LOG_ERR("k_thread_create() failed for core %u", i);
continue;
}
ret = k_thread_cpu_pin(tid, i);
if (ret < 0) {
LOG_ERR("Pinning thread to code core %u", i);
k_thread_abort(tid);
continue;
}
snprintf(name, sizeof(name), "%u thread info", i);
ret = k_thread_name_set(tid, name);
if (ret < 0)
LOG_INF("k_thread_name_set failed: %d for %u", ret, i);
k_thread_start(tid);
LOG_DBG("Thread %p for core %u started", tid, i);
}
return 0;
}
SYS_INIT(thread_info_start, APPLICATION, CONFIG_KERNEL_INIT_PRIORITY_DEFAULT);

View File

@ -0,0 +1,33 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright(c) 2024 Intel Corporation.
*/
#ifndef __SOC_DEBUG_STREAM_THREAD_INFO_H__
#define __SOC_DEBUG_STREAM_THREAD_INFO_H__
#include <user/debug_stream_slot.h>
/*
* Debug Stream Thread Info record header, including the cpu load for the
* core, and followed by thread_count. Immediately after the header
* follows the thread fields. The thread_count indicates the number of
* thread fields.
*/
struct thread_info_record_hdr {
struct debug_stream_record hdr;
uint8_t load; /* Core's load U(0,8) fixed point value */
uint8_t thread_count;
} __packed;
/*
* Debug Stream Thread Info field for a single thread.
*/
struct thread_info {
uint8_t stack_usage; /* Relative stack usage U(0,8) fixed point value */
uint8_t cpu_usage; /* Relative cpu usage U(0,8) fixed point value */
uint8_t name_len; /* Length of name string */
char name[];
} __packed;
#endif /* __SOC_DEBUG_STREAM_THREAD_INFO_H__ */