1 //==========================================================================
5 // Application profiling support
7 //==========================================================================
8 //####ECOSGPLCOPYRIGHTBEGIN####
9 // -------------------------------------------
10 // This file is part of eCos, the Embedded Configurable Operating System.
11 // Copyright (C) 2003 eCosCentric Ltd.
12 // Copyright (C) 2002 Gary Thomas
14 // eCos is free software; you can redistribute it and/or modify it under
15 // the terms of the GNU General Public License as published by the Free
16 // Software Foundation; either version 2 or (at your option) any later version.
18 // eCos is distributed in the hope that it will be useful, but WITHOUT ANY
19 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
20 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
23 // You should have received a copy of the GNU General Public License along
24 // with eCos; if not, write to the Free Software Foundation, Inc.,
25 // 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
27 // As a special exception, if other files instantiate templates or use macros
28 // or inline functions from this file, or you compile this file and link it
29 // with other works to produce a work based on this file, this file does not
30 // by itself cause the resulting work to be covered by the GNU General Public
31 // License. However the source code for this file must still be made available
32 // in accordance with section (3) of the GNU General Public License.
34 // This exception does not invalidate any other reasons why a work based on
35 // this file might be covered by the GNU General Public License.
37 // Alternative licenses for eCos may be arranged by contacting Red Hat, Inc.
38 // at http://sources.redhat.com/ecos/ecos-license/
39 // -------------------------------------------
40 //####ECOSGPLCOPYRIGHTEND####
41 //==========================================================================
42 //#####DESCRIPTIONBEGIN####
44 // Author(s): Gary Thomas
45 // Contributors: Bart Veer
47 // Purpose: Application profiling support
50 //####DESCRIPTIONEND####
52 //===========================================================================
54 #include <pkgconf/system.h>
55 #include <pkgconf/profile_gprof.h>
59 #include <cyg/infra/cyg_type.h>
60 #include <cyg/infra/diag.h>
61 #include <cyg/profile/profile.h>
62 #include <cyg/profile/gmon_out.h>
64 #ifdef CYGPKG_PROFILE_TFTP
66 # include <tftp_support.h>
69 // ----------------------------------------------------------------------------
70 // A gmon.out file starts with a struct gmon_hdr containing a cookie
71 // "gmon", a format version number, and some spare bytes. The structure
72 // is initialized by the profile_on() entry point so that it does not
73 // get garbage collected by the collector and hence a gdb script can
75 static struct gmon_hdr profile_gmon_hdr;
77 // The header is followed by data blocks. Each data block consists of a
78 // one-byte tag (HIST, ARC, or BB_COUNT), followed by data in a specific
80 static unsigned char profile_tags[3];
82 // The profiling data always contains histogram data. Typically an
83 // extra hardware timer is made to interrupt at the desired rate
84 // and stores the interrupted pc.
85 static struct gmon_hist_hdr profile_hist_hdr;
87 // The actual histogram counts. The file format only allows for 16-bit
88 // counts, which means overflow is a real possibility.
89 static cyg_uint16* profile_hist_data;
91 // Each slot in the histogram data covers a range of pc addresses,
92 // allowing a trade off between memory requirements and precision.
93 static int bucket_shift;
95 // Profiling is disabled on start-up and while a tftp transfer takes place.
96 static int profile_enabled;
98 // This is used by the gdb script to reset the profile data.
99 static int profile_reset_pending;
101 // The callgraph data. There is no header for this. Instead each non-zero
102 // entry is output separately, prefixed by an ARC tag. The data is accessed
103 // via a hash table/linked list combination. The tag is part of the
104 // structure to reduce the number of I/O operations needed for writing
108 unsigned char tags[4];
109 struct gmon_cg_arc_record record;
112 static struct profile_arc* profile_arc_records;
114 // The next free slot in the arc_records table.
115 static int profile_arc_next = 1;
117 #ifdef CYGPKG_PROFILE_CALLGRAPH
118 // The callgraph is accessed via a hash table. The hashing function is
119 // trivial, it just involves shifting an address an appropriate number
121 static int* profile_arc_hashtable;
123 // The sizes of these tables
124 static int profile_arc_hash_count;
125 static int profile_arc_records_count;
127 // Is the hashtable too small? Used for diagnostics.
128 static int profile_arc_overflow;
131 // Reset current profiling data.
135 memset(profile_hist_data, 0, profile_hist_hdr.hist_size * sizeof(cyg_uint16));
137 #ifdef CYGPKG_PROFILE_CALLGRAPH
138 // Zeroing the callgraph can be achieved by zeroing the hash
139 // table and resetting the next field used for indexing into
140 // the arc data itself. Whenever an arc data slot is allocated
141 // the count and addresses are reset.
142 memset(profile_arc_hashtable, 0, profile_arc_hash_count * sizeof(int));
143 profile_arc_next = 1;
144 profile_arc_overflow = 0;
148 // ----------------------------------------------------------------------------
149 // Accumulate profiling data.
151 // __profile_hit() will be called by HAL-specific code, typically in an ISR
152 // associated with a timer.
155 __profile_hit(CYG_ADDRWORD pc)
158 if (! profile_enabled ) {
159 if (! profile_reset_pending) {
162 // reset_pending can be set by the gdb script to request resetting
163 // the data. It avoids having to do lots of memory updates via the
164 // gdb protocol, which is too slow.
165 profile_reset_pending = 0;
170 if ((pc >= (CYG_ADDRWORD)profile_hist_hdr.low_pc) && (pc <= (CYG_ADDRWORD)profile_hist_hdr.high_pc)) {
171 bucket = (pc - (CYG_ADDRWORD)profile_hist_hdr.low_pc) >> bucket_shift;
172 if (profile_hist_data[bucket] < (unsigned short)0xFFFF) {
173 profile_hist_data[bucket]++;
178 #ifdef CYGPKG_PROFILE_CALLGRAPH
179 // __profile_mcount() will be called by the HAL-specific mcount() routine.
180 // When code is compiled with -pg the compiler inserts calls to mcount()
181 // at the start of each function. Typically mcount() will not use standard
182 // calling conventions so it has to be provided by the HAL.
184 // The from_pc/to_pc data should end up in profile_arc_records. A hash table
185 // maps a PC into a list chained through the records array. The hash function
186 // is a simple shift, so a range of PC addresses (usually 256 bytes) map
187 // onto a single linked list of arc records.
189 // We can hash on either the caller_pc, the callee_pc, or some combination.
190 // The caller PC will typically be in the middle of some function. The
191 // number of arcs that hash into the same list will depend on the number of
192 // function calls within a 256-byte region of code, multiplied by the
193 // number of different functions called at each location. The latter will
194 // be 1 unless the code uses changing function pointers. The callee pc
195 // is near the start of a function, and the number of hash collisions will
196 // depend on the number of places that function is called from. Usually this
197 // will be small, but some utility functions may be called from many different
200 // Hashing on the caller PC should give more deterministic results.
202 // On some targets the compiler does additional work. For example on
203 // the 68K in theory there is no need for a hash table because the
204 // compiler provides a word with each callee for the head of the
205 // linked list. It is not easy to cope with that in generic code, so
206 // for now this code ignores such compiler assistance.
208 // It is assumed that __profile_mcount() will be called with interrupts
212 __profile_mcount(CYG_ADDRWORD caller_pc, CYG_ADDRWORD callee_pc)
215 struct profile_arc* current;
217 // mcount() may be called at any time, even before profile_arc_records
218 // is enabled. There is an assumption here that .bss has been zeroed
219 // before the first call into C code, i.e. by the initial assembler
221 if (!profile_enabled) {
222 if (! profile_reset_pending) {
225 profile_reset_pending = 0;
230 // Check the caller_pc because that is what is used to index the
231 // hash table. Checking the callee_pc is optional and depends on
232 // exactly how you interpret the start and end addresses passed to
234 if ((caller_pc < (CYG_ADDRWORD)profile_hist_hdr.low_pc) ||
235 (caller_pc > (CYG_ADDRWORD)profile_hist_hdr.high_pc)) {
239 hash_index = (int) ((caller_pc - (CYG_ADDRWORD)profile_hist_hdr.low_pc) >> CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT);
240 if (0 == profile_arc_hashtable[hash_index]) {
241 if (profile_arc_next == profile_arc_records_count) {
242 profile_arc_overflow = 1;
244 profile_arc_hashtable[hash_index] = profile_arc_next;
245 current = &(profile_arc_records[profile_arc_next]);
248 current->record.from_pc = (void*) caller_pc;
249 current->record.self_pc = (void*) callee_pc;
250 current->record.count = 1;
253 current = &(profile_arc_records[profile_arc_hashtable[hash_index]]);
255 if ((current->record.from_pc == (void*) caller_pc) && (current->record.self_pc == (void*) callee_pc)) {
256 current->record.count++;
258 } else if (0 == current->next) {
259 if (profile_arc_next == profile_arc_records_count) {
260 profile_arc_overflow = 1;
262 current->next = profile_arc_next;
263 current = &(profile_arc_records[profile_arc_next]);
266 current->record.from_pc = (void*) caller_pc;
267 current->record.self_pc = (void*) callee_pc;
268 current->record.count = 1;
272 current = &(profile_arc_records[current->next]);
279 #ifdef CYGPKG_PROFILE_TFTP
280 // ----------------------------------------------------------------------------
283 // To keep things simple this code only supports one open file at a time,
284 // and only gmon.out is supported.
286 static int profile_tftp_next_index = 0;
287 static unsigned char* profile_tftp_current_block = (unsigned char*) 0;
288 static int profile_tftp_current_len = 0;
289 static int profile_tftp_is_open = 0;
292 profile_tftp_open(const char *filename, int flags)
294 // Only allow one open file for now.
295 if (profile_tftp_is_open) {
298 // Only read-only access is supported.
299 if ((0 != (flags & ~O_RDONLY)) || (0 == (flags & O_RDONLY))) {
302 // Only gmon.out can be retrieved using this tftp daemon
303 if (0 != strcmp(filename, "gmon.out")) {
306 // Everything is in order. Prepare for the first read. Profiling
307 // is suspended while the tftp transfer is in progress to avoid
308 // inconsistent results.
310 profile_tftp_is_open = 1;
311 profile_tftp_next_index = 0;
312 profile_tftp_current_len = 0;
314 // Report any callgraph overflows. This is best done when retrieving
315 // the results, either in the gdb script or at tftp open time.
316 #ifdef CYGPKG_PROFILE_CALLGRAPH
317 if (profile_arc_overflow) {
318 diag_printf("Profiling: warning, the table of callgraph arcs has overflowed\n");
319 diag_printf("This can be avoided by increasing CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE\n");
327 profile_tftp_close(int fd)
329 if (! profile_tftp_is_open) {
332 profile_tftp_is_open = 0;
334 // The histogram counters are only 16 bits, so can easily overflow
335 // during a long run. Resetting the counters here makes it possible
336 // to examine profile data during different parts of the run with
337 // a reduced risk of overflow.
340 // Profiling was disabled in the open() call
345 // gmon.out can only be read, not written.
347 profile_tftp_write(int fd, const void *buf, int len)
352 // The data that should go into gmon.out is spread all over memory.
353 // This utility is used to move from one block to the next.
355 profile_tftp_read_next(void)
357 switch (profile_tftp_next_index) {
358 case 0 : // The current block is the gmon hdr
359 profile_tftp_current_block = (unsigned char*) &profile_gmon_hdr;
360 profile_tftp_current_len = sizeof(struct gmon_hdr);
362 case 1 : // The histogram tag
363 profile_tftp_current_block = &(profile_tags[0]);
364 profile_tftp_current_len = 1;
366 case 2 : // The histogram header
367 profile_tftp_current_block = (unsigned char*) &profile_hist_hdr;
368 profile_tftp_current_len = sizeof(struct gmon_hist_hdr);
370 case 3 : // The histogram data
371 profile_tftp_current_block = (unsigned char*) profile_hist_data;
372 profile_tftp_current_len = profile_hist_hdr.hist_size * sizeof(cyg_uint16);
374 default : // One of the arc records. These start at array offset 1.
376 int arc_index = profile_tftp_next_index - 3;
377 if (arc_index >= profile_arc_next) {
378 profile_tftp_current_block = (unsigned char*) 0;
379 profile_tftp_current_len = 0;
381 // gmon.out should contain a 1 byte tag followed by each
383 profile_tftp_current_block = (unsigned char*) &(profile_arc_records[arc_index].tags[3]);
384 profile_tftp_current_len = sizeof(struct gmon_cg_arc_record) + 1;
389 profile_tftp_next_index++;
392 // Read the next block of data. There is no seek operation so no need
393 // to worry about the current position. State from the previous reads
394 // is held in profile_tftp_current_block and profile_tftp_current_len
396 profile_tftp_read(int fd, void *buf_arg, int len)
398 unsigned char* buf = (unsigned char*) buf_arg;
401 if ( ! profile_tftp_is_open ) {
406 if (0 == profile_tftp_current_len) {
407 profile_tftp_read_next();
408 if (0 == profile_tftp_current_len) {
412 if (profile_tftp_current_len >= len) {
413 // The request can be satisfied by the current block
414 memcpy(&(buf[read]), profile_tftp_current_block, len);
415 profile_tftp_current_block += len;
416 profile_tftp_current_len -= len;
420 memcpy(&(buf[read]), profile_tftp_current_block, profile_tftp_current_len);
421 len -= profile_tftp_current_len;
422 read += profile_tftp_current_len;
423 profile_tftp_current_len = 0;
429 static struct tftpd_fileops profile_tftp_fileops = {
437 // ----------------------------------------------------------------------------
442 // suspend currently running profiling
444 // Clear all pre-existing profile data
446 if (profile_hist_data) {
447 free(profile_hist_data);
448 profile_hist_data = NULL;
450 #ifdef CYGPKG_PROFILE_CALLGRAPH
451 if (profile_arc_hashtable) {
452 free(profile_arc_hashtable);
453 profile_arc_hashtable=NULL;
455 if (profile_arc_records) {
456 free(profile_arc_records);
457 profile_arc_records=NULL;
463 // ----------------------------------------------------------------------------
464 // profile_on() has to be called by application code to start profiling.
465 // Application code will determine the start and end addresses, usually
466 // _stext and _etext, but it is possible to limit profiling to only
467 // some of the code. The bucket size controls how many PC addresses
468 // will be treated as a single hit: a smaller bucket increases precision
469 // but requires more memory. The resolution is used to initialize the
470 // profiling timer: more frequent interrupts means more accurate results
471 // but increases the risk of an overflow.
473 // profile_on() can be invoked multiple times. If invoked a second time
474 // it will stop the current profiling run and create a new profiling
480 profile_on(void *_start, void *_end, int _bucket_size, int resolution)
483 cyg_uint32 version = GMON_VERSION;
484 CYG_ADDRWORD text_size = (CYG_ADDRWORD)_end - (CYG_ADDRWORD)_start;
488 // invoking profile_on a second time
493 // Initialize statics. This also ensures that they won't be
494 // garbage collected by the linker so a gdb script can safely
496 memcpy(profile_gmon_hdr.cookie, GMON_MAGIC, 4);
497 memcpy(profile_gmon_hdr.version, &version, 4);
498 profile_tags[0] = GMON_TAG_TIME_HIST;
499 profile_tags[1] = GMON_TAG_CG_ARC;
500 profile_tags[2] = GMON_TAG_BB_COUNT;
501 strcpy(profile_hist_hdr.dimen, "seconds");
502 profile_hist_hdr.dimen_abbrev = 's';
504 // The actual bucket size. For efficiency this should be a power of 2.
507 while (bucket_size < _bucket_size) {
512 // The gprof documentation claims that this should be the size in
513 // bytes. The implementation treats it as a count.
514 profile_hist_hdr.hist_size = (cyg_uint32) ((text_size + bucket_size - 1) / bucket_size);
515 profile_hist_hdr.low_pc = _start;
516 profile_hist_hdr.high_pc = (void*)((cyg_uint8*)_end - 1);
517 // The prof_rate is the frequency in hz. The resolution argument is
518 // an interval in microseconds.
519 profile_hist_hdr.prof_rate = 1000000 / resolution;
521 // Now allocate a buffer for the histogram data.
522 profile_hist_data = (cyg_uint16*) malloc(profile_hist_hdr.hist_size * sizeof(cyg_uint16));
523 if ((cyg_uint16*)0 == profile_hist_data) {
524 diag_printf("profile_on(): cannot allocate histogram buffer - ignored\n");
527 memset(profile_hist_data, 0, profile_hist_hdr.hist_size * sizeof(cyg_uint16));
529 #ifdef CYGPKG_PROFILE_CALLGRAPH
530 // Two arrays are needed for keeping track of the callgraph. The
531 // first is a hash table. The second holds the arc data. The
532 // latter array contains an extra 50 slots to cope with degenerate
533 // programs (including testcases).
537 profile_arc_hash_count = (int) ((text_size + (0x01 << CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT) - 1)
538 >> CYGNUM_PROFILE_CALLGRAPH_HASH_SHIFT);
539 profile_arc_records_count = (int)
540 (CYGNUM_PROFILE_CALLGRAPH_ARC_PERCENTAGE * (text_size / 100)) /
541 sizeof(struct profile_arc)
544 profile_arc_hashtable = (int*) malloc(profile_arc_hash_count * sizeof(int));
545 if ((int*)0 == profile_arc_hashtable) {
546 diag_printf("profile_on(): cannot allocate call graph hash table\n call graph profiling disabled\n");
548 memset(profile_arc_hashtable, 0, profile_arc_hash_count * sizeof(int));
549 profile_arc_records = (struct profile_arc*) malloc(profile_arc_records_count * sizeof(struct profile_arc));
550 if ((struct profile_arc*)0 == profile_arc_records) {
551 diag_printf("profile_on(): cannot allocate call graph arc table\n call graph profiling disabled\n");
552 free(profile_arc_hashtable);
553 profile_arc_hashtable = (int*) 0;
555 memset(profile_arc_records, 0, profile_arc_records_count * sizeof(struct profile_arc));
556 for (i = 0; i < profile_arc_records_count; i++) {
557 profile_arc_records[i].tags[3] = GMON_TAG_CG_ARC;
559 profile_arc_next = 1; // slot 0 cannot be used because 0 marks an unused hash slot.
564 profile_arc_records = (struct profile_arc*) 0;
567 diag_printf("Profile from %p..%p in %d buckets of size %d\n",
568 profile_hist_hdr.low_pc, profile_hist_hdr.high_pc,
569 profile_hist_hdr.hist_size, bucket_size);
571 // Activate the profiling timer, which is usually provided by the
572 // variant or target HAL. The requested resolution may not be
573 // possible on the current hardware, so the HAL is allowed to
575 resolution = hal_enable_profile_timer(resolution);
576 profile_hist_hdr.prof_rate = 1000000 / resolution;
580 #ifdef CYGPKG_PROFILE_TFTP
581 // Create a TFTP server to provide the data
582 // invoking this a second time is harmless
583 (void) tftpd_start(CYGNUM_PROFILE_TFTP_PORT, &profile_tftp_fileops);