commit 51d3beb1b6239099fe5d1500fdaecaa2971e76ff
parent b930f6ac494f8af3d271859633d313b6729238b6
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 14:01:18 -0700
asm: implement .comm/.lcomm and .uleb128/.sleb128 (were silently skipped)
The shared assembler driver lumped .comm/.lcomm/.uleb128/.sleb128 into the
accepted-but-ignored skip-to-EOL bucket. That was silent miscompilation, not a
missing-feature error: .comm produced no symbol and reserved no space (a
program relying on '.comm buf,1024' got a missing/zero-size symbol), and
.uleb128/.sleb128 emitted zero bytes, shifting every following byte in
hand-written DWARF/exception tables.
Implement them:
- .comm/.lcomm NAME, SIZE[, ALIGN] mint an SK_COMMON symbol (global / local)
with the given size and alignment; the linker allocates .bss space.
- .uleb128/.sleb128 VALUE[, ...] emit proper LEB128 byte sequences.
Verified: .comm gbuf -> global COMMON (align 8), .lcomm lbuf -> local COMMON;
LEB128 output matches llvm-mc byte-for-byte (00 7f 80 01 e5 8e 26 7e ff 7e 3f).
Adds an aa64 LEB128 encode corpus case to the default suite.
Diffstat:
4 files changed, 73 insertions(+), 3 deletions(-)
diff --git a/src/asm/asm.c b/src/asm/asm.c
@@ -897,15 +897,73 @@ static void do_directive(AsmDriver* d, Sym name) {
return;
}
+ /* .comm/.lcomm NAME, SIZE[, ALIGN] — declare a common symbol. Previously
+ * skipped, which silently produced no symbol and reserved no space. Model
+ * both as SK_COMMON (the linker allocates .bss space); .comm is global,
+ * .lcomm local. */
+ if (sym_eq(d, name, "comm") || sym_eq(d, name, "lcomm")) {
+ int is_local = sym_eq(d, name, "lcomm");
+ Sym nm = expect_ident(d, ".comm");
+ i64 size = 0, align = 1;
+ if (!asm_driver_eat_comma(d)) d_panicf(d, "asm: .comm: expected ','");
+ size = asm_driver_parse_const(d);
+ if (asm_driver_eat_comma(d)) align = asm_driver_parse_const(d);
+ if (size < 0) size = 0;
+ if (align < 1) align = 1;
+ {
+ ObjSym* s = sym_mut(d, intern_sym(d, nm));
+ s->kind = (u16)SK_COMMON;
+ s->bind = (u16)(is_local ? SB_LOCAL : SB_GLOBAL);
+ s->size = (u64)size;
+ s->common_align = (u64)align;
+ }
+ d_skip_to_eol(d);
+ return;
+ }
+ /* .uleb128/.sleb128 VALUE[, VALUE...] — emit LEB128-encoded bytes.
+ * Previously skipped, which emitted nothing and corrupted any hand-written
+ * DWARF / exception tables that follow. */
+ if (sym_eq(d, name, "uleb128") || sym_eq(d, name, "sleb128")) {
+ int sgn = sym_eq(d, name, "sleb128");
+ (void)asm_driver_cur_section(d);
+ for (;;) {
+ i64 v = asm_driver_parse_const(d);
+ u8 buf[16];
+ u32 n = 0;
+ if (sgn) {
+ int more = 1;
+ while (more) {
+ u8 b = (u8)((u64)v & 0x7fu);
+ v >>= 7; /* arithmetic right shift keeps the sign */
+ if ((v == 0 && !(b & 0x40u)) || (v == -1 && (b & 0x40u)))
+ more = 0;
+ else
+ b |= 0x80u;
+ buf[n++] = b;
+ }
+ } else {
+ u64 uv = (u64)v;
+ do {
+ u8 b = (u8)(uv & 0x7fu);
+ uv >>= 7;
+ if (uv) b |= 0x80u;
+ buf[n++] = b;
+ } while (uv);
+ }
+ d->mc->emit_bytes(d->mc, buf, n);
+ if (!asm_driver_eat_comma(d)) break;
+ }
+ d_skip_to_eol(d);
+ return;
+ }
+
/* CFI block + accepted-but-ignored directives. Keep parser
* forward-progress without aborting the whole TU. */
if (starts_with(d, name, "cfi_") || sym_eq(d, name, "file") ||
sym_eq(d, name, "loc") || sym_eq(d, name, "ident") ||
sym_eq(d, name, "popsection") || sym_eq(d, name, "pushsection") ||
sym_eq(d, name, "previous") ||
- sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "comm") ||
- sym_eq(d, name, "lcomm") || sym_eq(d, name, "uleb128") ||
- sym_eq(d, name, "sleb128") || sym_eq(d, name, "macro") ||
+ sym_eq(d, name, "subsections_via_symbols") || sym_eq(d, name, "macro") ||
sym_eq(d, name, "endm") || sym_eq(d, name, "if") ||
sym_eq(d, name, "endif") || sym_eq(d, name, "else") ||
sym_eq(d, name, "include")) {
diff --git a/test/asm/encode/aa64_data_leb.expected.hex b/test/asm/encode/aa64_data_leb.expected.hex
@@ -0,0 +1 @@
+007f8001e58e267eff7e3faa
diff --git a/test/asm/encode/aa64_data_leb.s b/test/asm/encode/aa64_data_leb.s
@@ -0,0 +1,9 @@
+.text
+ .uleb128 0
+ .uleb128 127
+ .uleb128 128
+ .uleb128 624485
+ .sleb128 -2
+ .sleb128 -129
+ .sleb128 63
+ .byte 0xaa
diff --git a/test/asm/encode/aa64_data_leb.targets b/test/asm/encode/aa64_data_leb.targets
@@ -0,0 +1 @@
+aa64
+\ No newline at end of file