incubator-nuttx/sched/instrument/profile_monitor.c

585 lines
16 KiB
C

/****************************************************************************
* sched/instrument/profile_monitor.c
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership. The
* ASF licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the
* License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
****************************************************************************/
/****************************************************************************
* Included Files
****************************************************************************/
#include <debug.h>
#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/gmon.h>
#include <nuttx/arch.h>
#include <nuttx/init.h>
#include <nuttx/kmalloc.h>
#include <nuttx/fs/fs.h>
#include <nuttx/spinlock.h>
/****************************************************************************
* Pre-processor Definitions
****************************************************************************/
#define GMONVERSION 0x00051879
/* Histogram counters are unsigned shorts (according to the kernel). */
#define HISTCOUNTER unsigned short
/* Fraction of text space to allocate for histogram counters here, 1/2 */
#define HISTFRACTION 2
/* Fraction of text space to allocate for from hash buckets.
* The value of HASHFRACTION is based on the minimum number of bytes
* of separation between two subroutine call points in the object code.
* Given MIN_SUBR_SEPARATION bytes of separation the value of
* HASHFRACTION is calculated as:
*
* HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
*
* For example, on the VAX, the shortest two call sequence is:
*
* calls $0,(r0)
* calls $0,(r0)
*
* Which is separated by only three bytes, thus HASHFRACTION is
* calculated as:
*
* HASHFRACTION = 3 / (2 * 2 - 1) = 1
*
* Note that the division above rounds down, thus if MIN_SUBR_FRACTION
* is less than three, this algorithm will not work!
*
* In practice, however, call instructions are rarely at a minimal
* distance. Hence, we will define HASHFRACTION to be 2 across all
* architectures. This saves a reasonable amount of space for
* profiling data structures without (in practice) sacrificing
* any granularity.
*/
#define HASHFRACTION 2
/* Percent of text space to allocate for tostructs with a minimum.
* This is a heuristic; we will fail with a warning when profiling
* programs with a very large number of very small functions, but
* that's normally OK.
* 2 is probably still a good value for normal programs.
* Profiling a test case with 64000 small functions will work if
* you raise this value to 3 and link statically (which bloats the
* text size, thus raising the number of arcs expected by the heuristic).
*/
#define ARCDENSITY 3
/* Always allocate at least this many tostructs. This
* hides the inadequacy of the ARCDENSITY heuristic, at least
* for small programs.
*/
#define MINARCS 50
/* The type used to represent indices into gmonparam.tos[]. */
#define ARCINDEX unsigned long
/* Maximum number of arcs we want to allow.
* Used to be max representable value of ARCINDEX minus 2, but now
* that ARCINDEX is a long, that's too large; we don't really want
* to allow a 48 gigabyte table.
*/
#define MAXARCS (1 << 20)
/* General rounding functions. */
#define ROUNDDOWN(x, y) (((x) / (y)) * (y))
#define ROUNDUP(x, y) ((((x) + (y) - 1) / (y)) * (y))
/* See profil(2) where this is described (incorrectly) */
#define SCALE_1_TO_1 0x10000
/****************************************************************************
* Private Types
****************************************************************************/
struct tostruct
{
uintptr_t selfpc; /* Callee address/program counter. The caller address
* is in froms[] array which points to tos[] array
*/
long count; /* How many times it has been called */
ARCINDEX link; /* Link to next entry in hash table. For tos[0] this
* points to the last used entry
*/
};
/* Structure prepended to gmon.out profiling data file. */
struct gmonhdr
{
uintptr_t lpc; /* Base pc address of sample buffer */
uintptr_t hpc; /* Max pc address of sampled buffer */
uint32_t ncnt; /* Size of sample buffer (plus this header) */
uint32_t version; /* Version number */
uint32_t profrate; /* Profiling clock rate */
uint32_t spare[3]; /* Reserved */
};
/* A raw arc, with pointers to the calling site and
* the called site and a count.
*/
struct rawarc
{
uintptr_t raw_frompc;
uintptr_t raw_selfpc;
long raw_count;
};
/* The profiling data structures are housed in this structure. */
struct gmonparam
{
bool running;
FAR unsigned short *kcount; /* Histogram PC sample array */
size_t kcountsize; /* Size of kcount[] array in bytes */
FAR ARCINDEX *froms; /* Array of hashed 'from' addresses. The 16bit
* value is an index into the tos[] array
*/
size_t fromssize; /* Size of froms[] array in bytes */
FAR struct tostruct *tos; /* To struct, contains histogram counter */
size_t tossize; /* Size of tos[] array in bytes */
size_t tolimit;
uintptr_t lowpc; /* Low program counter of area */
uintptr_t highpc; /* High program counter */
size_t textsize; /* Code size */
spinlock_t lock; /* Lock for this structure */
};
/****************************************************************************
* Private Data
****************************************************************************/
static struct gmonparam g_monparam;
/****************************************************************************
* Public Data
****************************************************************************/
extern uint8_t _stext[];
extern uint8_t _etext[];
/****************************************************************************
* Private Functions
****************************************************************************/
noinstrument_function
static int write_gmon(FAR struct gmonparam *p, FAR const char *output)
{
struct gmonhdr gmonhdr;
struct rawarc rawarc;
struct file file;
uintptr_t frompc;
ARCINDEX toindex;
size_t fromindex;
size_t endfrom;
int ret;
ret = file_open(&file, output, O_CREAT | O_TRUNC | O_WRONLY, 0666);
if (ret < 0)
{
serr("cannot open %s\n", output);
return ret;
}
gmonhdr.lpc = p->lowpc;
gmonhdr.hpc = p->highpc;
gmonhdr.ncnt = sizeof(gmonhdr) + p->kcountsize;
gmonhdr.version = GMONVERSION;
gmonhdr.profrate = CONFIG_SCHED_PROFILE_TICKSPERSEC;
ret = file_write(&file, &gmonhdr, sizeof(gmonhdr));
if (ret != sizeof(gmonhdr))
{
serr("write gmonhdr failed\n");
goto out;
}
ret = file_write(&file, p->kcount, p->kcountsize);
if (ret != p->kcountsize)
{
serr("write kcount failed\n");
goto out;
}
endfrom = p->fromssize / sizeof(*p->froms);
for (fromindex = 0; fromindex < endfrom; fromindex++)
{
if (p->froms[fromindex] == 0)
{
continue;
}
frompc = p->lowpc;
frompc += fromindex * HASHFRACTION * sizeof(*p->froms);
for (toindex = p->froms[fromindex]; toindex != 0;
toindex = p->tos[toindex].link)
{
rawarc.raw_frompc = frompc;
rawarc.raw_selfpc = p->tos[toindex].selfpc;
rawarc.raw_count = p->tos[toindex].count;
ret = file_write(&file, &rawarc, sizeof(rawarc));
if (ret != sizeof(rawarc))
{
serr("write rawarc failed\n");
goto out;
}
}
}
out:
file_close(&file);
return ret < 0 ? ret : 0;
}
/****************************************************************************
* Public Functions
****************************************************************************/
/* Control profiling
* profiling is what mcount checks to see if
* all the data structures are ready.
*/
noinstrument_function
void moncontrol(int mode)
{
FAR struct gmonparam *p = &g_monparam;
irqstate_t flags;
if (p->running == !!mode)
{
return;
}
if (mode)
{
uintptr_t lowpc = ROUNDDOWN((uintptr_t)&_stext,
HISTFRACTION * sizeof(HISTCOUNTER));
uintptr_t highpc = ROUNDUP((uintptr_t)&_etext,
HISTFRACTION * sizeof(HISTCOUNTER));
size_t textsize = highpc - lowpc;
size_t kcountsize = ROUNDUP(textsize / HISTFRACTION,
sizeof(*p->kcount));
int scale = kcountsize >= textsize ? SCALE_1_TO_1 :
(float)kcountsize / textsize * SCALE_1_TO_1;
FAR unsigned short *kcount = kmm_zalloc(kcountsize);
if (kcount == NULL)
{
serr("out of memory\n");
return;
}
flags = spin_lock_irqsave(&p->lock);
if (p->kcount)
{
spin_unlock_irqrestore(&p->lock, flags);
kmm_free(kcount);
return;
}
p->running = true;
p->lowpc = lowpc;
p->highpc = highpc;
p->textsize = textsize;
p->kcount = kcount;
p->kcountsize = kcountsize;
spin_unlock_irqrestore(&p->lock, flags);
profil(kcount, kcountsize, lowpc, scale);
}
else
{
bool running;
flags = spin_lock_irqsave(&p->lock);
running = p->running;
p->running = false;
spin_unlock_irqrestore(&p->lock, flags);
if (running)
{
profil(NULL, 0, 0, 0);
}
}
}
noinstrument_function
void monstartup(unsigned long lowpc, unsigned long highpc)
{
FAR struct gmonparam *p = &g_monparam;
irqstate_t flags;
FAR char *buffer;
size_t textsize;
size_t fromssize;
size_t tolimit;
size_t tossize;
/* If we are incorrectly called twice in a row (without an
* intervening call to _mcleanup), ignore the second call to
* prevent leaking memory.
*/
if (p->tos != NULL)
{
return;
}
/* Return if the allocation doesn't allow in the current context */
if (!OSINIT_OS_READY() || up_interrupt_context())
{
return;
}
/* Round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
*/
lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
textsize = highpc - lowpc;
fromssize = ROUNDUP(textsize / HASHFRACTION, sizeof(*p->froms));
tolimit = textsize * ARCDENSITY / 100;
if (tolimit < MINARCS)
{
tolimit = MINARCS;
}
else if (tolimit > MAXARCS)
{
tolimit = MAXARCS;
}
tossize = tolimit * sizeof(struct tostruct);
buffer = kmm_zalloc(fromssize + tossize);
if (buffer == NULL)
{
serr("out of memory\n");
return;
}
flags = spin_lock_irqsave(&p->lock);
if (p->tos != NULL)
{
spin_unlock_irqrestore(&p->lock, flags);
kmm_free(buffer);
return;
}
p->lowpc = lowpc;
p->highpc = highpc;
p->textsize = textsize;
p->fromssize = fromssize;
p->tolimit = tolimit;
p->tossize = tossize;
p->tos = (FAR struct tostruct *)buffer;
buffer += p->tossize;
p->froms = (FAR ARCINDEX *)buffer;
spin_unlock_irqrestore(&p->lock, flags);
moncontrol(1);
}
noinstrument_function
void _mcleanup(void)
{
FAR struct gmonparam *p = &g_monparam;
FAR const char *prefix = NULL;
#ifndef CONFIG_DISABLE_ENVIRON
prefix = getenv("GMON_OUT_PREFIX");
#endif
if (prefix == NULL)
{
prefix = "gmon.out";
}
moncontrol(0);
if (p->kcount)
{
write_gmon(p, prefix);
}
kmm_free(p->tos);
kmm_free(p->kcount);
/* Reset buffer to initial state for safety */
memset(p, 0, sizeof(*p));
}
/* mcount_internal is called on entry to each function compiled with
* the profiling switch set by an assembly stub in:
* libs/libc/machine/xxx/mcount.S
* which updates data structures that represent traversals of the
* program's call graph edges. frompc and selfpc are the return
* address and function address that represents the given call graph edge.
*/
noinstrument_function
void mcount_internal(uintptr_t frompc, uintptr_t selfpc)
{
FAR struct gmonparam *p = &g_monparam;
FAR struct tostruct *prevtop;
FAR struct tostruct *top;
FAR ARCINDEX *frompcindex;
ARCINDEX toindex;
irqstate_t flags;
/* Check that we are profiling */
if (!p->running)
{
return;
}
/* Initialize the internal structure if not yet */
monstartup((uintptr_t)&_stext, (uintptr_t)&_etext);
flags = spin_lock_irqsave(&p->lock);
/* Try next time if fail to initialize for some reason */
if (p->tos == NULL)
{
goto done;
}
/* Check that frompc is a reasonable pc value.
* For example: signal catchers get called from the stack,
* not from text space. Too bad.
*/
frompc -= p->lowpc;
if (frompc > p->textsize)
{
goto done;
}
frompcindex = &p->froms[frompc / (HASHFRACTION * sizeof(*p->froms))];
toindex = *frompcindex; /* Get froms[] value */
if (toindex == 0)
{
/* First time traversing this arc */
toindex = ++p->tos[0].link; /* The link of tos[0] points to the last
* used record in the array
*/
if (toindex >= p->tolimit)
{
/* More tos[] entries than we can handle! */
goto done;
}
/* Store new 'to' value into froms[] */
*frompcindex = toindex;
top = &p->tos[toindex];
top->selfpc = selfpc;
top->count = 1;
top->link = 0;
goto done;
}
top = &p->tos[toindex];
if (top->selfpc == selfpc)
{
/* Arc at front of chain; usual case. */
top->count++;
goto done;
}
/* Have to go looking down chain for it.
* Top points to what we are looking at,
* prevtop points to previous top.
* We know it is not at the head of the chain.
*/
for (; ; )
{
if (top->link == 0)
{
/* Top is end of the chain and none of the chain
* had top->selfpc == selfpc.
* So we allocate a new tostruct
* and link it to the head of the chain.
*/
toindex = ++p->tos[0].link;
if (toindex >= p->tolimit)
{
goto done;
}
top = &p->tos[toindex];
top->selfpc = selfpc;
top->count = 1;
top->link = *frompcindex;
*frompcindex = toindex;
goto done;
}
/* Otherwise, check the next arc on the chain. */
prevtop = top;
top = &p->tos[top->link];
if (top->selfpc == selfpc)
{
/* There it is.
* Increment its count
* move it to the head of the chain.
*/
top->count++;
toindex = prevtop->link;
prevtop->link = top->link;
top->link = *frompcindex;
*frompcindex = toindex;
goto done;
}
}
done:
spin_unlock_irqrestore(&p->lock, flags);
}