sched: Implement profil function

which is very useful for performance analysis:
https://man7.org/linux/man-pages/man3/profil.3.html

Signed-off-by: Xiang Xiao <xiaoxiang@xiaomi.com>
This commit is contained in:
Xiang Xiao 2024-03-12 01:48:22 +08:00 committed by Xiang Xiao
parent f774afb4d9
commit 4944bfd56c
5 changed files with 157 additions and 1 deletions

View File

@ -466,6 +466,9 @@ int getentropy(FAR void *buffer, size_t length);
void sync(void);
int syncfs(int fd);
int profil(FAR unsigned short *buf, size_t bufsiz,
size_t offset, unsigned int scale);
#if CONFIG_FORTIFY_SOURCE > 0
fortify_function(getcwd) FAR char *getcwd(FAR char *buf,
size_t size)

View File

@ -1162,6 +1162,13 @@ config SCHED_CPULOAD_TIMECONSTANT
tick count exceeds this time constant. This time constant is in
units of seconds.
config SCHED_PROFILE_TICKSPERSEC
int "Profile sampling rate"
default 1000
---help---
This is the frequency at which the profil functon will sample the
running program. The default is 1000Hz.
menuconfig SCHED_INSTRUMENTATION
bool "System performance monitor hooks"
default n

View File

@ -21,6 +21,7 @@
# ##############################################################################
set(SRCS
sched_getfiles.c
sched_profil.c
sched_addreadytorun.c
sched_removereadytorun.c
sched_addprioritized.c

View File

@ -20,7 +20,7 @@
#
############################################################################
CSRCS += sched_getfiles.c
CSRCS += sched_getfiles.c sched_profil.c
CSRCS += sched_addreadytorun.c sched_removereadytorun.c
CSRCS += sched_addprioritized.c sched_mergeprioritized.c sched_mergepending.c
CSRCS += sched_addblocked.c sched_removeblocked.c

145
sched/sched/sched_profil.c Normal file
View File

@ -0,0 +1,145 @@
/****************************************************************************
* sched/sched/sched_profil.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <errno.h>
#include <unistd.h>
#include <nuttx/arch.h>
#include <nuttx/wdog.h>
#include <nuttx/spinlock.h>
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
#define PROFTICK NSEC2TICK(NSEC_PER_SEC / CONFIG_SCHED_PROFILE_TICKSPERSEC)
/****************************************************************************
* Private Types
****************************************************************************/
struct profinfo_s
{
FAR unsigned short *counter; /* Histogram PC sample array */
uintptr_t lowpc; /* Range to be profiled */
uintptr_t highpc; /* Range to be profiled */
unsigned int scale; /* Scale value of bins */
struct wdog_s timer; /* Timer for profiling */
spinlock_t lock; /* Lock for this structure */
};
/****************************************************************************
* Private Data
****************************************************************************/
static struct profinfo_s g_prof;
/****************************************************************************
* Private Functions
****************************************************************************/
static void profil_timer_handler(wdparm_t arg)
{
FAR struct profinfo_s *prof = (FAR struct profinfo_s *)(uintptr_t)arg;
uintptr_t pc = up_getusrpc(NULL);
irqstate_t flags;
flags = spin_lock_irqsave(&prof->lock);
if (pc >= prof->lowpc && pc < prof->highpc)
{
size_t idx = (pc - prof->lowpc) / 2;
#if UINTMAX_MAX > SIZE_MAX
idx = (uintmax_t)idx * prof->scale / 65536;
#else
idx = idx / 65536 * prof->scale + idx % 65536 * prof->scale / 65536;
#endif
prof->counter[idx]++;
}
spin_unlock_irqrestore(&prof->lock, flags);
wd_start(&prof->timer, PROFTICK, profil_timer_handler, arg);
}
/****************************************************************************
* Public Functions
****************************************************************************/
/****************************************************************************
* Name: profil
*
* Description:
* This routine provides a means to find out in what areas your
* program spends most of its time. The argument buf points to
* bufsiz bytes of core. the user's program counter (PC) is
* examined SCHED_PROFILE_TICKSPERSEC times in every second:
* offset is subtracted and the result is multiplied by scale
* and divided by 65536. If the resulting value is less than
* bufsiz, then the corresponding entry in buf is incremented.
* If buf is NULL, profiling is disabled.
*
* Input Parameters:
* buf - Buffer to record the hitting count
* bufsiz - Size of buffer in bytes
* offset - The lowest address to be sampled
* scale - Multiply address by scale / 65536
*
* Returned Value:
* Zero (OK) if successful. Otherwise, ERROR is returned and
* errno is set to indicate the error.
*
****************************************************************************/
int profil(FAR unsigned short *buf, size_t bufsiz,
size_t offset, unsigned int scale)
{
FAR struct profinfo_s *prof = &g_prof;
irqstate_t flags;
uintptr_t highpc;
if (scale > 65536)
{
set_errno(EINVAL);
return ERROR;
}
if (buf == NULL || scale == 0)
{
wd_cancel(&prof->timer);
return OK;
}
memset(buf, 0, bufsiz);
highpc = (uintmax_t)bufsiz * 32768 / scale;
flags = spin_lock_irqsave(&prof->lock);
prof->counter = buf;
prof->lowpc = offset;
prof->highpc = offset + highpc;
prof->scale = scale;
spin_unlock_irqrestore(&prof->lock, flags);
wd_start(&prof->timer, PROFTICK, profil_timer_handler,
(wdparm_t)(uintptr_t)prof);
return OK;
}