kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 51d3beb1b6239099fe5d1500fdaecaa2971e76ff
parent b930f6ac494f8af3d271859633d313b6729238b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 14:01:18 -0700

asm: implement .comm/.lcomm and .uleb128/.sleb128 (were silently skipped)

The shared assembler driver lumped .comm/.lcomm/.uleb128/.sleb128 into the
accepted-but-ignored skip-to-EOL bucket. That was silent miscompilation, not a
missing-feature error: .comm produced no symbol and reserved no space (a
program relying on '.comm buf,1024' got a missing/zero-size symbol), and
.uleb128/.sleb128 emitted zero bytes, shifting every following byte in
hand-written DWARF/exception tables.

Implement them:
  - .comm/.lcomm NAME, SIZE[, ALIGN] mint an SK_COMMON symbol (global / local)
    with the given size and alignment; the linker allocates .bss space.
  - .uleb128/.sleb128 VALUE[, ...] emit proper LEB128 byte sequences.

Verified: .comm gbuf -> global COMMON (align 8), .lcomm lbuf -> local COMMON;
LEB128 output matches llvm-mc byte-for-byte (00 7f 80 01 e5 8e 26 7e ff 7e 3f).
Adds an aa64 LEB128 encode corpus case to the default suite.

Diffstat:
Msrc/asm/asm.c | 64+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Atest/asm/encode/aa64_data_leb.expected.hex | 1+
Atest/asm/encode/aa64_data_leb.s | 9+++++++++
Atest/asm/encode/aa64_data_leb.targets | 2++
4 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/src/asm/asm.c b/src/asm/asm.c @@ -897,15 +897,73 @@ static void do_directive(AsmDriver* d, Sym name) { return; } + /* .comm/.lcomm NAME, SIZE[, ALIGN] — declare a common symbol. Previously + * skipped, which silently produced no symbol and reserved no space. Model + * both as SK_COMMON (the linker allocates .bss space); .comm is global, + * .lcomm local. */ + if (sym_eq(d, name, "comm") || sym_eq(d, name, "lcomm")) { + int is_local = sym_eq(d, name, "lcomm"); + Sym nm = expect_ident(d, ".comm"); + i64 size = 0, align = 1; + if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .comm: expected ','"); + size = asm_driver_parse_const(d); + if (asm_driver_eat_comma(d)) align = asm_driver_parse_const(d); + if (size < 0) size = 0; + if (align < 1) align = 1; + { + ObjSym* s = sym_mut(d, intern_sym(d, nm)); + s->kind = (u16)SK_COMMON; + s->bind = (u16)(is_local ? SB_LOCAL : SB_GLOBAL); + s->size = (u64)size; + s->common_align = (u64)align; + } + d_skip_to_eol(d); + return; + } + /* .uleb128/.sleb128 VALUE[, VALUE...] — emit LEB128-encoded bytes. + * Previously skipped, which emitted nothing and corrupted any hand-written + * DWARF / exception tables that follow. */ + if (sym_eq(d, name, "uleb128") || sym_eq(d, name, "sleb128")) { + int sgn = sym_eq(d, name, "sleb128"); + (void)asm_driver_cur_section(d); + for (;;) { + i64 v = asm_driver_parse_const(d); + u8 buf[16]; + u32 n = 0; + if (sgn) { + int more = 1; + while (more) { + u8 b = (u8)((u64)v & 0x7fu); + v >>= 7; /* arithmetic right shift keeps the sign */ + if ((v == 0 && !(b & 0x40u)) || (v == -1 && (b & 0x40u))) + more = 0; + else + b |= 0x80u; + buf[n++] = b; + } + } else { + u64 uv = (u64)v; + do { + u8 b = (u8)(uv & 0x7fu); + uv >>= 7; + if (uv) b |= 0x80u; + buf[n++] = b; + } while (uv); + } + d->mc->emit_bytes(d->mc, buf, n); + if (!asm_driver_eat_comma(d)) break; + } + d_skip_to_eol(d); + return; + } + /* CFI block + accepted-but-ignored directives. Keep parser * forward-progress without aborting the whole TU. */ if (starts_with(d, name, "cfi_") || sym_eq(d, name, "file") || sym_eq(d, name, "loc") || sym_eq(d, name, "ident") || sym_eq(d, name, "popsection") || sym_eq(d, name, "pushsection") || sym_eq(d, name, "previous") || - sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "comm") || - sym_eq(d, name, "lcomm") || sym_eq(d, name, "uleb128") || - sym_eq(d, name, "sleb128") || sym_eq(d, name, "macro") || + sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "macro") || sym_eq(d, name, "endm") || sym_eq(d, name, "if") || sym_eq(d, name, "endif") || sym_eq(d, name, "else") || sym_eq(d, name, "include")) { diff --git a/test/asm/encode/aa64_data_leb.expected.hex b/test/asm/encode/aa64_data_leb.expected.hex @@ -0,0 +1 @@ +007f8001e58e267eff7e3faa diff --git a/test/asm/encode/aa64_data_leb.s b/test/asm/encode/aa64_data_leb.s @@ -0,0 +1,9 @@ +.text + .uleb128 0 + .uleb128 127 + .uleb128 128 + .uleb128 624485 + .sleb128 -2 + .sleb128 -129 + .sleb128 63 + .byte 0xaa diff --git a/test/asm/encode/aa64_data_leb.targets b/test/asm/encode/aa64_data_leb.targets @@ -0,0 +1 @@ +aa64 +\ No newline at end of file