kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

symbolize.c (6327B)


      1 #include <kit/core.h>
      2 #include <stddef.h>
      3 #include <stdint.h>
      4 #include <string.h>
      5 
      6 #include "driver.h"
      7 #include "dwarfsym.h"
      8 
      9 /* kit symbolize — annotate a __kit_print_backtrace stream in place.
     10  *
     11  * Where `addr2line` is a faithful clone of the GNU/LLVM "addresses in,
     12  * file:line out" contract, `symbolize` matches kit's actual backtrace
     13  * artifact: it reads the raw "#N 0x<hex>" lines that __kit_print_backtrace
     14  * writes (rt/lib/stack/print_backtrace.c), finds the address on each line,
     15  * resolves it through the same DWARF reader (kit_dwarf_func_at +
     16  * kit_dwarf_addr_to_line, via driver/lib/dwarfsym), and rewrites the line as
     17  *
     18  *     #0 0x401136 bt_leaf at addr2line_prog.c:51:3
     19  *
     20  * preserving the original "#N" framing that addr2line structurally can't keep.
     21  * Lines with no recognizable 0x<hex> token pass through verbatim, so a mixed
     22  * log (banner + frames) survives unharmed. See doc/plan/BACKTRACE.md (WS5). */
     23 
     24 #define SYM_TOOL "symbolize"
     25 
     26 typedef struct SymOpts {
     27   const char* exe_path;
     28   int basenames; /* --basenames */
     29 } SymOpts;
     30 
     31 /* Find the first "0x"/"0X"-prefixed hex run in [s, s+len) and decode it.
     32  * Returns 1 and stores the value in *out when a token with at least one hex
     33  * digit is found, else 0. Works on a non-NUL-terminated span (the line is a
     34  * slice of the stdin buffer), so it never reads past `len`. */
     35 static int sym_line_addr(const char* s, size_t len, uint64_t* out) {
     36   size_t i;
     37   for (i = 0; i + 1 < len; ++i) {
     38     if (s[i] == '0' && (s[i + 1] == 'x' || s[i + 1] == 'X')) {
     39       uint64_t v = 0;
     40       size_t j = i + 2;
     41       int any = 0;
     42       for (; j < len; ++j) {
     43         char c = s[j];
     44         int d;
     45         if (c >= '0' && c <= '9')
     46           d = c - '0';
     47         else if (c >= 'a' && c <= 'f')
     48           d = c - 'a' + 10;
     49         else if (c >= 'A' && c <= 'F')
     50           d = c - 'A' + 10;
     51         else
     52           break;
     53         v = (v << 4) | (uint64_t)d;
     54         any = 1;
     55       }
     56       if (any) {
     57         *out = v;
     58         return 1;
     59       }
     60     }
     61   }
     62   return 0;
     63 }
     64 
     65 /* Print " <func> at <file>:<line>[:<col>]" for a resolved address, using the
     66  * "??" / "??:0" placeholders addr2line's pretty mode uses for the missing
     67  * halves. Mirrors a2l_translate's pretty path so the two tools render an
     68  * unresolved frame identically. */
     69 static void sym_emit_annotation(const DriverSymLoc* loc, const SymOpts* opts) {
     70   driver_printf(" ");
     71   if (loc->have_func)
     72     driver_printf("%.*s at ", (int)loc->func.len, loc->func.s);
     73   else
     74     driver_printf("?? at ");
     75 
     76   if (loc->have_line) {
     77     const char* f = loc->file.s;
     78     if (opts->basenames) f = driver_basename(f);
     79     driver_printf("%s:%u", f, loc->line);
     80     if (loc->col) driver_printf(":%u", loc->col);
     81   } else {
     82     driver_printf("??:0");
     83   }
     84 }
     85 
     86 void driver_help_symbolize(void) {
     87   driver_printf(
     88       "%.*s",
     89       KIT_SLICE_ARG(KIT_SLICE_LIT(
     90           "kit symbolize — annotate a kit backtrace stream with func at "
     91           "file:line\n"
     92           "\n"
     93           "USAGE\n"
     94           "  <prog that prints a backtrace> | kit symbolize -e FILE\n"
     95           "  kit symbolize -e FILE < backtrace.txt\n"
     96           "\n"
     97           "OPTIONS\n"
     98           "  -e FILE              object file with debug info (required)\n"
     99           "  --basenames          strip directory from file paths\n"
    100           "  -h, --help           show this help\n"
    101           "\n"
    102           "Reads the raw \"#N 0x<hex>\" lines that __kit_print_backtrace "
    103           "writes\n"
    104           "on standard input, resolves the address on each line via the "
    105           "image's\n"
    106           "debug info, and rewrites the line as\n"
    107           "  #0 0x401136 bt_leaf at file.c:51:3\n"
    108           "keeping the original \"#N\" framing. Lines with no 0x<hex> address "
    109           "pass\n"
    110           "through unchanged. For a static non-PIE image the runtime "
    111           "addresses\n"
    112           "equal the link-time addresses, so no load-bias adjustment is "
    113           "needed.\n")));
    114 }
    115 
    116 int driver_symbolize(int argc, char** argv) {
    117   DriverEnv env;
    118   SymOpts opts;
    119   DriverDwarfSym sym;
    120   uint8_t* data = NULL;
    121   size_t size = 0;
    122   int i, rc = 1, opened = 0;
    123   size_t pos;
    124 
    125   if (argc < 2 || driver_argv_wants_help(argc, argv, 1)) {
    126     driver_help_symbolize();
    127     return 0;
    128   }
    129 
    130   memset(&opts, 0, sizeof opts);
    131   driver_env_init(&env);
    132 
    133   for (i = 1; i < argc; ++i) {
    134     const char* a = argv[i];
    135     if (driver_streq(a, "-e")) {
    136       if (i + 1 >= argc) {
    137         driver_errf(SYM_TOOL, "-e requires a path");
    138         rc = 2;
    139         goto done;
    140       }
    141       opts.exe_path = argv[++i];
    142       continue;
    143     }
    144     if (driver_streq(a, "--basenames")) {
    145       opts.basenames = 1;
    146       continue;
    147     }
    148     if (a[0] == '-' && a[1] != '\0') {
    149       driver_errf(SYM_TOOL, "unknown option: %s", a);
    150       rc = 2;
    151       goto done;
    152     }
    153     driver_errf(SYM_TOOL, "unexpected argument: %s", a);
    154     rc = 2;
    155     goto done;
    156   }
    157 
    158   if (!opts.exe_path) {
    159     driver_errf(SYM_TOOL, "no object file specified (-e FILE)");
    160     rc = 2;
    161     goto done;
    162   }
    163 
    164   /* open() memsets `sym` before any fallible step, so once we are past this
    165    * point driver_dwarfsym_close is always safe — even on a partial failure. */
    166   opened = 1;
    167   if (driver_dwarfsym_open(&sym, &env, SYM_TOOL, opts.exe_path) != 0) goto done;
    168 
    169   if (!driver_read_stdin(&env, &data, &size)) {
    170     driver_errf(SYM_TOOL, "failed to read backtrace stream from stdin");
    171     goto done;
    172   }
    173 
    174   /* Emit each line verbatim, appending an annotation when it carries an
    175    * address. Split on '\n'; a final line without a trailing newline is still
    176    * processed (and printed without one). */
    177   pos = 0;
    178   while (pos < size) {
    179     size_t start = pos;
    180     size_t end = start;
    181     uint64_t addr = 0;
    182     while (end < size && data[end] != '\n') ++end;
    183 
    184     driver_printf("%.*s", (int)(end - start), (const char*)(data + start));
    185     if (sym_line_addr((const char*)(data + start), end - start, &addr)) {
    186       DriverSymLoc loc;
    187       driver_dwarfsym_lookup(&sym, addr, 1, &loc);
    188       sym_emit_annotation(&loc, &opts);
    189     }
    190     if (end < size) {
    191       driver_printf("\n");
    192       pos = end + 1;
    193     } else {
    194       pos = end; /* last line had no terminator */
    195     }
    196   }
    197 
    198   rc = 0;
    199 
    200 done:
    201   if (data) driver_free(&env, data, size);
    202   if (opened) driver_dwarfsym_close(&sym);
    203   driver_env_fini(&env);
    204   return rc;
    205 }