/**************************************************************************** * sched/instrument/profile_monitor.c * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. The * ASF licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the * License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * ****************************************************************************/ /**************************************************************************** * Included Files ****************************************************************************/ #include #include #include #include #include #include #include #include #include #include #include #include #include /**************************************************************************** * Pre-processor Definitions ****************************************************************************/ #define GMONVERSION 0x00051879 /* Histogram counters are unsigned shorts (according to the kernel). */ #define HISTCOUNTER unsigned short /* Fraction of text space to allocate for histogram counters here, 1/2 */ #define HISTFRACTION 2 /* Fraction of text space to allocate for from hash buckets. * The value of HASHFRACTION is based on the minimum number of bytes * of separation between two subroutine call points in the object code. * Given MIN_SUBR_SEPARATION bytes of separation the value of * HASHFRACTION is calculated as: * * HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1); * * For example, on the VAX, the shortest two call sequence is: * * calls $0,(r0) * calls $0,(r0) * * Which is separated by only three bytes, thus HASHFRACTION is * calculated as: * * HASHFRACTION = 3 / (2 * 2 - 1) = 1 * * Note that the division above rounds down, thus if MIN_SUBR_FRACTION * is less than three, this algorithm will not work! * * In practice, however, call instructions are rarely at a minimal * distance. Hence, we will define HASHFRACTION to be 2 across all * architectures. This saves a reasonable amount of space for * profiling data structures without (in practice) sacrificing * any granularity. */ #define HASHFRACTION 2 /* Percent of text space to allocate for tostructs with a minimum. * This is a heuristic; we will fail with a warning when profiling * programs with a very large number of very small functions, but * that's normally OK. * 2 is probably still a good value for normal programs. * Profiling a test case with 64000 small functions will work if * you raise this value to 3 and link statically (which bloats the * text size, thus raising the number of arcs expected by the heuristic). */ #define ARCDENSITY 3 /* Always allocate at least this many tostructs. This * hides the inadequacy of the ARCDENSITY heuristic, at least * for small programs. */ #define MINARCS 50 /* The type used to represent indices into gmonparam.tos[]. */ #define ARCINDEX unsigned long /* Maximum number of arcs we want to allow. * Used to be max representable value of ARCINDEX minus 2, but now * that ARCINDEX is a long, that's too large; we don't really want * to allow a 48 gigabyte table. */ #define MAXARCS (1 << 20) /* General rounding functions. */ #define ROUNDDOWN(x, y) (((x) / (y)) * (y)) #define ROUNDUP(x, y) ((((x) + (y) - 1) / (y)) * (y)) /* See profil(2) where this is described (incorrectly) */ #define SCALE_1_TO_1 0x10000 /**************************************************************************** * Private Types ****************************************************************************/ struct tostruct { uintptr_t selfpc; /* Callee address/program counter. The caller address * is in froms[] array which points to tos[] array */ long count; /* How many times it has been called */ ARCINDEX link; /* Link to next entry in hash table. For tos[0] this * points to the last used entry */ }; /* Structure prepended to gmon.out profiling data file. */ struct gmonhdr { uintptr_t lpc; /* Base pc address of sample buffer */ uintptr_t hpc; /* Max pc address of sampled buffer */ uint32_t ncnt; /* Size of sample buffer (plus this header) */ uint32_t version; /* Version number */ uint32_t profrate; /* Profiling clock rate */ uint32_t spare[3]; /* Reserved */ }; /* A raw arc, with pointers to the calling site and * the called site and a count. */ struct rawarc { uintptr_t raw_frompc; uintptr_t raw_selfpc; long raw_count; }; /* The profiling data structures are housed in this structure. */ struct gmonparam { bool running; FAR unsigned short *kcount; /* Histogram PC sample array */ size_t kcountsize; /* Size of kcount[] array in bytes */ FAR ARCINDEX *froms; /* Array of hashed 'from' addresses. The 16bit * value is an index into the tos[] array */ size_t fromssize; /* Size of froms[] array in bytes */ FAR struct tostruct *tos; /* To struct, contains histogram counter */ size_t tossize; /* Size of tos[] array in bytes */ size_t tolimit; uintptr_t lowpc; /* Low program counter of area */ uintptr_t highpc; /* High program counter */ size_t textsize; /* Code size */ spinlock_t lock; /* Lock for this structure */ }; /**************************************************************************** * Private Data ****************************************************************************/ static struct gmonparam g_monparam; /**************************************************************************** * Public Data ****************************************************************************/ extern uint8_t _stext[]; extern uint8_t _etext[]; /**************************************************************************** * Private Functions ****************************************************************************/ noinstrument_function static int write_gmon(FAR struct gmonparam *p, FAR const char *output) { struct gmonhdr gmonhdr; struct rawarc rawarc; struct file file; uintptr_t frompc; ARCINDEX toindex; size_t fromindex; size_t endfrom; int ret; ret = file_open(&file, output, O_CREAT | O_TRUNC | O_WRONLY, 0666); if (ret < 0) { serr("cannot open %s\n", output); return ret; } gmonhdr.lpc = p->lowpc; gmonhdr.hpc = p->highpc; gmonhdr.ncnt = sizeof(gmonhdr) + p->kcountsize; gmonhdr.version = GMONVERSION; gmonhdr.profrate = CONFIG_SCHED_PROFILE_TICKSPERSEC; ret = file_write(&file, &gmonhdr, sizeof(gmonhdr)); if (ret != sizeof(gmonhdr)) { serr("write gmonhdr failed\n"); goto out; } ret = file_write(&file, p->kcount, p->kcountsize); if (ret != p->kcountsize) { serr("write kcount failed\n"); goto out; } endfrom = p->fromssize / sizeof(*p->froms); for (fromindex = 0; fromindex < endfrom; fromindex++) { if (p->froms[fromindex] == 0) { continue; } frompc = p->lowpc; frompc += fromindex * HASHFRACTION * sizeof(*p->froms); for (toindex = p->froms[fromindex]; toindex != 0; toindex = p->tos[toindex].link) { rawarc.raw_frompc = frompc; rawarc.raw_selfpc = p->tos[toindex].selfpc; rawarc.raw_count = p->tos[toindex].count; ret = file_write(&file, &rawarc, sizeof(rawarc)); if (ret != sizeof(rawarc)) { serr("write rawarc failed\n"); goto out; } } } out: file_close(&file); return ret < 0 ? ret : 0; } /**************************************************************************** * Public Functions ****************************************************************************/ /* Control profiling * profiling is what mcount checks to see if * all the data structures are ready. */ noinstrument_function void moncontrol(int mode) { FAR struct gmonparam *p = &g_monparam; irqstate_t flags; if (p->running == !!mode) { return; } if (mode) { uintptr_t lowpc = ROUNDDOWN((uintptr_t)&_stext, HISTFRACTION * sizeof(HISTCOUNTER)); uintptr_t highpc = ROUNDUP((uintptr_t)&_etext, HISTFRACTION * sizeof(HISTCOUNTER)); size_t textsize = highpc - lowpc; size_t kcountsize = ROUNDUP(textsize / HISTFRACTION, sizeof(*p->kcount)); int scale = kcountsize >= textsize ? SCALE_1_TO_1 : (float)kcountsize / textsize * SCALE_1_TO_1; FAR unsigned short *kcount = kmm_zalloc(kcountsize); if (kcount == NULL) { serr("out of memory\n"); return; } flags = spin_lock_irqsave(&p->lock); if (p->kcount) { spin_unlock_irqrestore(&p->lock, flags); kmm_free(kcount); return; } p->running = true; p->lowpc = lowpc; p->highpc = highpc; p->textsize = textsize; p->kcount = kcount; p->kcountsize = kcountsize; spin_unlock_irqrestore(&p->lock, flags); profil(kcount, kcountsize, lowpc, scale); } else { bool running; flags = spin_lock_irqsave(&p->lock); running = p->running; p->running = false; spin_unlock_irqrestore(&p->lock, flags); if (running) { profil(NULL, 0, 0, 0); } } } noinstrument_function void monstartup(unsigned long lowpc, unsigned long highpc) { FAR struct gmonparam *p = &g_monparam; irqstate_t flags; FAR char *buffer; size_t textsize; size_t fromssize; size_t tolimit; size_t tossize; /* If we are incorrectly called twice in a row (without an * intervening call to _mcleanup), ignore the second call to * prevent leaking memory. */ if (p->tos != NULL) { return; } /* Return if the allocation doesn't allow in the current context */ if (!OSINIT_OS_READY() || up_interrupt_context()) { return; } /* Round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. */ lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER)); highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER)); textsize = highpc - lowpc; fromssize = ROUNDUP(textsize / HASHFRACTION, sizeof(*p->froms)); tolimit = textsize * ARCDENSITY / 100; if (tolimit < MINARCS) { tolimit = MINARCS; } else if (tolimit > MAXARCS) { tolimit = MAXARCS; } tossize = tolimit * sizeof(struct tostruct); buffer = kmm_zalloc(fromssize + tossize); if (buffer == NULL) { serr("out of memory\n"); return; } flags = spin_lock_irqsave(&p->lock); if (p->tos != NULL) { spin_unlock_irqrestore(&p->lock, flags); kmm_free(buffer); return; } p->lowpc = lowpc; p->highpc = highpc; p->textsize = textsize; p->fromssize = fromssize; p->tolimit = tolimit; p->tossize = tossize; p->tos = (FAR struct tostruct *)buffer; buffer += p->tossize; p->froms = (FAR ARCINDEX *)buffer; spin_unlock_irqrestore(&p->lock, flags); moncontrol(1); } noinstrument_function void _mcleanup(void) { FAR struct gmonparam *p = &g_monparam; FAR const char *prefix = NULL; #ifndef CONFIG_DISABLE_ENVIRON prefix = getenv("GMON_OUT_PREFIX"); #endif if (prefix == NULL) { prefix = "gmon.out"; } moncontrol(0); if (p->kcount) { write_gmon(p, prefix); } kmm_free(p->tos); kmm_free(p->kcount); /* Reset buffer to initial state for safety */ memset(p, 0, sizeof(*p)); } /* mcount_internal is called on entry to each function compiled with * the profiling switch set by an assembly stub in: * libs/libc/machine/xxx/mcount.S * which updates data structures that represent traversals of the * program's call graph edges. frompc and selfpc are the return * address and function address that represents the given call graph edge. */ noinstrument_function void mcount_internal(uintptr_t frompc, uintptr_t selfpc) { FAR struct gmonparam *p = &g_monparam; FAR struct tostruct *prevtop; FAR struct tostruct *top; FAR ARCINDEX *frompcindex; ARCINDEX toindex; irqstate_t flags; /* Check that we are profiling */ if (!p->running) { return; } /* Initialize the internal structure if not yet */ monstartup((uintptr_t)&_stext, (uintptr_t)&_etext); flags = spin_lock_irqsave(&p->lock); /* Try next time if fail to initialize for some reason */ if (p->tos == NULL) { goto done; } /* Check that frompc is a reasonable pc value. * For example: signal catchers get called from the stack, * not from text space. Too bad. */ frompc -= p->lowpc; if (frompc > p->textsize) { goto done; } frompcindex = &p->froms[frompc / (HASHFRACTION * sizeof(*p->froms))]; toindex = *frompcindex; /* Get froms[] value */ if (toindex == 0) { /* First time traversing this arc */ toindex = ++p->tos[0].link; /* The link of tos[0] points to the last * used record in the array */ if (toindex >= p->tolimit) { /* More tos[] entries than we can handle! */ goto done; } /* Store new 'to' value into froms[] */ *frompcindex = toindex; top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = 0; goto done; } top = &p->tos[toindex]; if (top->selfpc == selfpc) { /* Arc at front of chain; usual case. */ top->count++; goto done; } /* Have to go looking down chain for it. * Top points to what we are looking at, * prevtop points to previous top. * We know it is not at the head of the chain. */ for (; ; ) { if (top->link == 0) { /* Top is end of the chain and none of the chain * had top->selfpc == selfpc. * So we allocate a new tostruct * and link it to the head of the chain. */ toindex = ++p->tos[0].link; if (toindex >= p->tolimit) { goto done; } top = &p->tos[toindex]; top->selfpc = selfpc; top->count = 1; top->link = *frompcindex; *frompcindex = toindex; goto done; } /* Otherwise, check the next arc on the chain. */ prevtop = top; top = &p->tos[top->link]; if (top->selfpc == selfpc) { /* There it is. * Increment its count * move it to the head of the chain. */ top->count++; toindex = prevtop->link; prevtop->link = top->link; top->link = *frompcindex; *frompcindex = toindex; goto done; } } done: spin_unlock_irqrestore(&p->lock, flags); }