kit

kit
git clone https://git.ryansepassi.com/git/kit.git
Log | Files | Refs | README

commit 5b8c7a74d0fd81536a83f27676a6738bfd7a1be0
parent 9fd6b74d8e1db3d5afe450867e52e4ea6544456d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date:   Fri, 29 May 2026 15:39:10 -0700

x64 asm: parse sym(%rip), @PLT, @GOTPCREL relocation-operator operands

Adds GNU-as relocation-operator syntax to the x86-64 standalone assembler:
symbolic RIP-relative memory operands sym(%rip) (R_X86_64_PC32) and
sym@GOTPCREL(%rip) (R_X86_64_REX_GOTPCRELX), and an @PLT suffix on call/jmp
targets (R_X86_64_PLT32). The disp32 relocation is emitted at mc->pos-4-trailing
with addend off-4-trailing, so PC32 yields the rip-relative displacement to the
end of the instruction (trailing accounts for an immediate after the disp, e.g.
movl $imm,sym(%rip) → addend -8). Corpus x64_reloc_modifiers is byte- and
reloc-identical to llvm-mc including addends (verified via llvm-objdump -r).

Diffstat:
Msrc/arch/x64/asm.c | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------------------
Atest/asm/encode/x64_reloc_modifiers.expected.hex | 1+
Atest/asm/encode/x64_reloc_modifiers.s | 10++++++++++
Atest/asm/encode/x64_reloc_modifiers.targets | 2++
4 files changed, 93 insertions(+), 26 deletions(-)

diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c @@ -44,9 +44,13 @@ typedef struct X64AsmOperand { u8 scale; /* MEM SIB: log2 of scale ∈ {0,1,2,3} → 1/2/4/8 */ u8 has_index; /* MEM: SIB index present */ u8 rip_relative; /* MEM: bare (%rip)/disp(%rip) form */ - u8 pad[1]; + u8 has_reloc; /* MEM: symbolic disp carries a relocation */ + u8 pad[3]; i64 imm; i32 disp; + RelocKind reloc_kind; /* MEM: reloc on the disp32 (PC32 / REX_GOTPCRELX) */ + ObjSymId reloc_sym; /* MEM: relocated symbol */ + i64 reloc_off; /* MEM: user addend on the symbol */ } X64AsmOperand; static int x64_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, u32* width_out, @@ -213,6 +217,21 @@ static void parse_mem_paren_body(AsmDriver* d, X64AsmOperand* op) { asm_driver_panic(d, "x64 asm: expected register in memory operand"); } +/* Consume an optional `@MOD` relocation suffix after a symbol and return the + * RelocKind it selects, or `dflt` when no suffix is present. */ +static RelocKind x64_parse_reloc_suffix(AsmDriver* d, RelocKind dflt) { + if (!asm_driver_tok_is_punct(asm_driver_peek(d), '@')) return dflt; + (void)asm_driver_next(d); /* '@' */ + AsmTok n = asm_driver_next(d); + if (n.kind != ASM_TOK_IDENT) + asm_driver_panic(d, "x64 asm: expected relocation name after '@'"); + Slice s = pool_slice(asm_driver_pool(d), n.v.ident); + if (slice_eq_cstr(s, "PLT")) return R_X64_PLT32; + if (slice_eq_cstr(s, "GOTPCREL")) return R_X64_REX_GOTPCRELX; + if (slice_eq_cstr(s, "GOTPCRELX")) return R_X64_GOTPCRELX; + asm_driver_panic(d, "x64 asm: unsupported relocation suffix"); +} + static X64AsmOperand parse_operand(AsmDriver* d) { X64AsmOperand op; AsmTok t; @@ -269,13 +288,36 @@ static X64AsmOperand parse_operand(AsmDriver* d) { op.kind = X64_ASM_OP_MEM; op.disp = 0; if (!asm_driver_tok_is_punct(t, '(')) { - op.disp = (i32)asm_driver_parse_const(d); + /* A symbolic displacement (`sym(%rip)`, `sym@GOTPCREL(%rip)`) becomes a + * relocation; a numeric displacement stays literal. */ + if (asm_driver_peek(d).kind == ASM_TOK_IDENT) { + asm_driver_parse_sym_expr(d, &op.reloc_sym, &op.reloc_off); + op.reloc_kind = x64_parse_reloc_suffix(d, R_PC32); + op.has_reloc = 1; + } else { + op.disp = (i32)asm_driver_parse_const(d); + } } asm_driver_expect_punct(d, '(', "'(' in x64 memory operand"); parse_mem_paren_body(d, &op); + if (op.has_reloc && !op.rip_relative) + asm_driver_panic(d, "x64 asm: symbolic memory displacement requires (%rip)"); return op; } +/* Emit the relocation a symbolic `(%rip)` memory operand carries, if any. The + * disp32 field is the last 4 bytes of the instruction except for an immediate + * store, where `trailing` immediate bytes follow it. R_X86_64_PC32-style + * relocs use addend (off - 4 - trailing) so S+A-P yields the rip-relative + * displacement to the end of the instruction. */ +static void x64_emit_mem_reloc(AsmDriver* d, MCEmitter* mc, + const X64AsmOperand* m, u32 trailing) { + if (!m->has_reloc) return; + u32 disp_pos = mc->pos(mc) - 4u - trailing; + mc->emit_reloc_at(mc, asm_driver_cur_section(d), disp_pos, m->reloc_kind, + m->reloc_sym, m->reloc_off - 4 - (i64)trailing, 1, 0); +} + static u32 x64_pack_rex_mem_operand(u8* out, int w, u32 reg, X64AsmOperand mem) { /* RIP-relative carries no base/index registers (rm=101, no SIB). */ @@ -309,8 +351,8 @@ static u32 x64_pack_mem_operand(u8* out, u32 reg, X64AsmOperand mem) { /* reg ← mem with an explicit single-byte opcode (e.g. 0x8B MOV, 0x8D LEA). * Routes the full memory-operand variety (plain / SIB / RIP / segment) * through the shared pack helpers. */ -static void emit_reg_mem_operand(MCEmitter* mc, u32 size, u8 opc, u32 dst, - X64AsmOperand src) { +static void emit_reg_mem_operand(AsmDriver* d, MCEmitter* mc, u32 size, u8 opc, + u32 dst, X64AsmOperand src) { u8 buf[16]; u32 n = 0; if (size == 2u) buf[n++] = X64_OPSIZE_PFX; @@ -319,19 +361,20 @@ static void emit_reg_mem_operand(MCEmitter* mc, u32 size, u8 opc, u32 dst, buf[n++] = opc; n += x64_pack_mem_operand(buf + n, dst, src); mc->emit_bytes(mc, buf, n); + x64_emit_mem_reloc(d, mc, &src, 0); } -static void emit_mov_load_operand(MCEmitter* mc, u32 size, u32 dst, +static void emit_mov_load_operand(AsmDriver* d, MCEmitter* mc, u32 size, u32 dst, X64AsmOperand src) { - emit_reg_mem_operand(mc, size, X64_OPC_MOV_R_RM, dst, src); + emit_reg_mem_operand(d, mc, size, X64_OPC_MOV_R_RM, dst, src); } /* reg → mem store with an explicit reg-to-r/m opcode. Used by MOV * (0x89/0x88) and the ALU /r stores (ADD 0x01, OR 0x09, AND 0x21, * SUB 0x29, XOR 0x31, CMP 0x39). The register operand occupies the * ModR/M reg field; the memory operand the r/m field. */ -static void emit_reg_store_operand(MCEmitter* mc, u32 size, u8 opc, u32 src, - X64AsmOperand dst, int force_rex) { +static void emit_reg_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, u8 opc, + u32 src, X64AsmOperand dst, int force_rex) { u8 buf[16]; u32 n = 0; if (size == 2u) buf[n++] = X64_OPSIZE_PFX; @@ -344,11 +387,12 @@ static void emit_reg_store_operand(MCEmitter* mc, u32 size, u8 opc, u32 src, buf[n++] = opc; n += x64_pack_mem_operand(buf + n, src, dst); mc->emit_bytes(mc, buf, n); + x64_emit_mem_reloc(d, mc, &dst, 0); } -static void emit_mov_store_operand(MCEmitter* mc, u32 size, u32 src, - X64AsmOperand dst, int force_rex) { - emit_reg_store_operand(mc, size, +static void emit_mov_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, + u32 src, X64AsmOperand dst, int force_rex) { + emit_reg_store_operand(d, mc, size, size == 1u ? X64_OPC_MOV_RM_R8 : X64_OPC_MOV_RM_R, src, dst, force_rex); } @@ -372,13 +416,19 @@ static void emit_rm_imm_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, n += x64_pack_rex_mem_operand(buf + n, size == 8u, 0, dst); buf[n++] = use_i8 ? opc8 : opc32; n += x64_pack_mem_operand(buf + n, sub, dst); - if (size == 1u) + u32 trailing; + if (size == 1u) { buf[n++] = (u8)imm; - else if (use_i8) + trailing = 1u; + } else if (use_i8) { buf[n++] = (u8)(i8)imm; - else + trailing = 1u; + } else { n += x64_put_u32le(buf + n, (u32)(i32)imm); + trailing = 4u; + } mc->emit_bytes(mc, buf, n); + x64_emit_mem_reloc(d, mc, &dst, trailing); } static void expect_comma(AsmDriver* d) { @@ -441,7 +491,7 @@ static __attribute__((unused)) void emit_movb_store_operand(AsmDriver* d, emit_mem_operand(mc, src.reg, dst.base, dst.disp); return; } - emit_mov_store_operand(mc, 1, src.reg, dst, 1); + emit_mov_store_operand(d, mc, 1, src.reg, dst, 1); } static __attribute__((unused)) void emit_rm_imm(AsmDriver* d, MCEmitter* mc, @@ -848,12 +898,12 @@ static void parse_alu_rr(X64ParseCtx* p) { if (p->width == 1u) emit_movb_store_operand(p->d, p->mc, src, dst); else - emit_mov_store_operand(p->mc, p->width, src.reg, dst, 0); + emit_mov_store_operand(p->d, p->mc, p->width, src.reg, dst, 0); return; } if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { - emit_mov_load_operand(p->mc, p->width, dst.reg, src); + emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src); return; } /* ALU reg → mem store (add/or/and/sub/xor/cmp %reg, mem): the reg-to-r/m @@ -861,7 +911,7 @@ static void parse_alu_rr(X64ParseCtx* p) { * opcode's W bit (e.g. ADD r/m,r 0x01 → r/m8,r8 0x00). */ if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_MEM) { u8 op = p->width == 1u ? (u8)(p->desc->opc[0] & ~1u) : p->desc->opc[0]; - emit_reg_store_operand(p->mc, p->width, op, src.reg, dst, + emit_reg_store_operand(p->d, p->mc, p->width, op, src.reg, dst, p->width == 1u && byte_reg_needs_rex(&src)); return; } @@ -903,11 +953,11 @@ static void parse_mov_rm_load(X64ParseCtx* p) { if (p->desc->opc[0] == 0x8Du) { if (src.kind != X64_ASM_OP_MEM || dst.kind != X64_ASM_OP_REG) asm_driver_panic(p->d, "x64 asm: lea form"); - emit_reg_mem_operand(p->mc, p->width, X64_OPC_LEA, dst.reg, src); + emit_reg_mem_operand(p->d, p->mc, p->width, X64_OPC_LEA, dst.reg, src); return; } if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { - emit_mov_load_operand(p->mc, p->width, dst.reg, src); + emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src); return; } if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) { @@ -1134,10 +1184,14 @@ static void parse_rel32_branch(X64ParseCtx* p) { asm_driver_parse_sym_expr(p->d, &sym, &off); if (sym == OBJ_SYM_NONE) asm_driver_panic(p->d, "x64 asm: symbolic branch target required"); - p->mc->emit_reloc_at( - p->mc, asm_driver_cur_section(p->d), disp_pos, - p->desc->fmt == X64_FMT_CALL_REL32 ? R_X64_PLT32 : R_PC32, sym, off - 4, - 1, 0); + /* A `@PLT` suffix forces the PLT32 reloc (the default for `call`); plain + * `jmp sym` uses PC32. */ + RelocKind dflt = p->desc->fmt == X64_FMT_CALL_REL32 ? R_X64_PLT32 : R_PC32; + RelocKind k = x64_parse_reloc_suffix(p->d, dflt); + if (k != R_X64_PLT32 && k != R_PC32) + asm_driver_panic(p->d, "x64 asm: only @PLT is valid on a branch target"); + p->mc->emit_reloc_at(p->mc, asm_driver_cur_section(p->d), disp_pos, k, sym, + off - 4, 1, 0); } static void parse_setcc(X64ParseCtx* p) { @@ -1436,11 +1490,11 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) { if (w == 1u) emit_movb_store_operand(d, mc, src, dst); else - emit_mov_store_operand(mc, w, src.reg, dst, 0); + emit_mov_store_operand(d, mc, w, src.reg, dst, 0); return; } if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) { - emit_mov_load_operand(mc, w, dst.reg, src); + emit_mov_load_operand(d, mc, w, dst.reg, src); return; } asm_driver_panic(d, "x64 asm: mov form"); diff --git a/test/asm/encode/x64_reloc_modifiers.expected.hex b/test/asm/encode/x64_reloc_modifiers.expected.hex @@ -0,0 +1 @@ +488d0500000000488b0d0000000048891500000000488b3500000000c7050000000007000000e800000000e900000000c3 diff --git a/test/asm/encode/x64_reloc_modifiers.s b/test/asm/encode/x64_reloc_modifiers.s @@ -0,0 +1,10 @@ +.text +t: + leaq sym(%rip), %rax + movq sym(%rip), %rcx + movq %rdx, sym(%rip) + movq sym@GOTPCREL(%rip), %rsi + movl $7, sym(%rip) + call foo@PLT + jmp bar@PLT + ret diff --git a/test/asm/encode/x64_reloc_modifiers.targets b/test/asm/encode/x64_reloc_modifiers.targets @@ -0,0 +1 @@ +x64 +\ No newline at end of file