commit 5b8c7a74d0fd81536a83f27676a6738bfd7a1be0
parent 9fd6b74d8e1db3d5afe450867e52e4ea6544456d
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Fri, 29 May 2026 15:39:10 -0700
x64 asm: parse sym(%rip), @PLT, @GOTPCREL relocation-operator operands
Adds GNU-as relocation-operator syntax to the x86-64 standalone assembler:
symbolic RIP-relative memory operands sym(%rip) (R_X86_64_PC32) and
sym@GOTPCREL(%rip) (R_X86_64_REX_GOTPCRELX), and an @PLT suffix on call/jmp
targets (R_X86_64_PLT32). The disp32 relocation is emitted at mc->pos-4-trailing
with addend off-4-trailing, so PC32 yields the rip-relative displacement to the
end of the instruction (trailing accounts for an immediate after the disp, e.g.
movl $imm,sym(%rip) → addend -8). Corpus x64_reloc_modifiers is byte- and
reloc-identical to llvm-mc including addends (verified via llvm-objdump -r).
Diffstat:
4 files changed, 93 insertions(+), 26 deletions(-)
diff --git a/src/arch/x64/asm.c b/src/arch/x64/asm.c
@@ -44,9 +44,13 @@ typedef struct X64AsmOperand {
u8 scale; /* MEM SIB: log2 of scale ∈ {0,1,2,3} → 1/2/4/8 */
u8 has_index; /* MEM: SIB index present */
u8 rip_relative; /* MEM: bare (%rip)/disp(%rip) form */
- u8 pad[1];
+ u8 has_reloc; /* MEM: symbolic disp carries a relocation */
+ u8 pad[3];
i64 imm;
i32 disp;
+ RelocKind reloc_kind; /* MEM: reloc on the disp32 (PC32 / REX_GOTPCRELX) */
+ ObjSymId reloc_sym; /* MEM: relocated symbol */
+ i64 reloc_off; /* MEM: user addend on the symbol */
} X64AsmOperand;
static int x64_reg_from_name(AsmDriver* d, Sym s, u32* reg_out, u32* width_out,
@@ -213,6 +217,21 @@ static void parse_mem_paren_body(AsmDriver* d, X64AsmOperand* op) {
asm_driver_panic(d, "x64 asm: expected register in memory operand");
}
+/* Consume an optional `@MOD` relocation suffix after a symbol and return the
+ * RelocKind it selects, or `dflt` when no suffix is present. */
+static RelocKind x64_parse_reloc_suffix(AsmDriver* d, RelocKind dflt) {
+ if (!asm_driver_tok_is_punct(asm_driver_peek(d), '@')) return dflt;
+ (void)asm_driver_next(d); /* '@' */
+ AsmTok n = asm_driver_next(d);
+ if (n.kind != ASM_TOK_IDENT)
+ asm_driver_panic(d, "x64 asm: expected relocation name after '@'");
+ Slice s = pool_slice(asm_driver_pool(d), n.v.ident);
+ if (slice_eq_cstr(s, "PLT")) return R_X64_PLT32;
+ if (slice_eq_cstr(s, "GOTPCREL")) return R_X64_REX_GOTPCRELX;
+ if (slice_eq_cstr(s, "GOTPCRELX")) return R_X64_GOTPCRELX;
+ asm_driver_panic(d, "x64 asm: unsupported relocation suffix");
+}
+
static X64AsmOperand parse_operand(AsmDriver* d) {
X64AsmOperand op;
AsmTok t;
@@ -269,13 +288,36 @@ static X64AsmOperand parse_operand(AsmDriver* d) {
op.kind = X64_ASM_OP_MEM;
op.disp = 0;
if (!asm_driver_tok_is_punct(t, '(')) {
- op.disp = (i32)asm_driver_parse_const(d);
+ /* A symbolic displacement (`sym(%rip)`, `sym@GOTPCREL(%rip)`) becomes a
+ * relocation; a numeric displacement stays literal. */
+ if (asm_driver_peek(d).kind == ASM_TOK_IDENT) {
+ asm_driver_parse_sym_expr(d, &op.reloc_sym, &op.reloc_off);
+ op.reloc_kind = x64_parse_reloc_suffix(d, R_PC32);
+ op.has_reloc = 1;
+ } else {
+ op.disp = (i32)asm_driver_parse_const(d);
+ }
}
asm_driver_expect_punct(d, '(', "'(' in x64 memory operand");
parse_mem_paren_body(d, &op);
+ if (op.has_reloc && !op.rip_relative)
+ asm_driver_panic(d, "x64 asm: symbolic memory displacement requires (%rip)");
return op;
}
+/* Emit the relocation a symbolic `(%rip)` memory operand carries, if any. The
+ * disp32 field is the last 4 bytes of the instruction except for an immediate
+ * store, where `trailing` immediate bytes follow it. R_X86_64_PC32-style
+ * relocs use addend (off - 4 - trailing) so S+A-P yields the rip-relative
+ * displacement to the end of the instruction. */
+static void x64_emit_mem_reloc(AsmDriver* d, MCEmitter* mc,
+ const X64AsmOperand* m, u32 trailing) {
+ if (!m->has_reloc) return;
+ u32 disp_pos = mc->pos(mc) - 4u - trailing;
+ mc->emit_reloc_at(mc, asm_driver_cur_section(d), disp_pos, m->reloc_kind,
+ m->reloc_sym, m->reloc_off - 4 - (i64)trailing, 1, 0);
+}
+
static u32 x64_pack_rex_mem_operand(u8* out, int w, u32 reg,
X64AsmOperand mem) {
/* RIP-relative carries no base/index registers (rm=101, no SIB). */
@@ -309,8 +351,8 @@ static u32 x64_pack_mem_operand(u8* out, u32 reg, X64AsmOperand mem) {
/* reg ← mem with an explicit single-byte opcode (e.g. 0x8B MOV, 0x8D LEA).
* Routes the full memory-operand variety (plain / SIB / RIP / segment)
* through the shared pack helpers. */
-static void emit_reg_mem_operand(MCEmitter* mc, u32 size, u8 opc, u32 dst,
- X64AsmOperand src) {
+static void emit_reg_mem_operand(AsmDriver* d, MCEmitter* mc, u32 size, u8 opc,
+ u32 dst, X64AsmOperand src) {
u8 buf[16];
u32 n = 0;
if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
@@ -319,19 +361,20 @@ static void emit_reg_mem_operand(MCEmitter* mc, u32 size, u8 opc, u32 dst,
buf[n++] = opc;
n += x64_pack_mem_operand(buf + n, dst, src);
mc->emit_bytes(mc, buf, n);
+ x64_emit_mem_reloc(d, mc, &src, 0);
}
-static void emit_mov_load_operand(MCEmitter* mc, u32 size, u32 dst,
+static void emit_mov_load_operand(AsmDriver* d, MCEmitter* mc, u32 size, u32 dst,
X64AsmOperand src) {
- emit_reg_mem_operand(mc, size, X64_OPC_MOV_R_RM, dst, src);
+ emit_reg_mem_operand(d, mc, size, X64_OPC_MOV_R_RM, dst, src);
}
/* reg → mem store with an explicit reg-to-r/m opcode. Used by MOV
* (0x89/0x88) and the ALU /r stores (ADD 0x01, OR 0x09, AND 0x21,
* SUB 0x29, XOR 0x31, CMP 0x39). The register operand occupies the
* ModR/M reg field; the memory operand the r/m field. */
-static void emit_reg_store_operand(MCEmitter* mc, u32 size, u8 opc, u32 src,
- X64AsmOperand dst, int force_rex) {
+static void emit_reg_store_operand(AsmDriver* d, MCEmitter* mc, u32 size, u8 opc,
+ u32 src, X64AsmOperand dst, int force_rex) {
u8 buf[16];
u32 n = 0;
if (size == 2u) buf[n++] = X64_OPSIZE_PFX;
@@ -344,11 +387,12 @@ static void emit_reg_store_operand(MCEmitter* mc, u32 size, u8 opc, u32 src,
buf[n++] = opc;
n += x64_pack_mem_operand(buf + n, src, dst);
mc->emit_bytes(mc, buf, n);
+ x64_emit_mem_reloc(d, mc, &dst, 0);
}
-static void emit_mov_store_operand(MCEmitter* mc, u32 size, u32 src,
- X64AsmOperand dst, int force_rex) {
- emit_reg_store_operand(mc, size,
+static void emit_mov_store_operand(AsmDriver* d, MCEmitter* mc, u32 size,
+ u32 src, X64AsmOperand dst, int force_rex) {
+ emit_reg_store_operand(d, mc, size,
size == 1u ? X64_OPC_MOV_RM_R8 : X64_OPC_MOV_RM_R, src,
dst, force_rex);
}
@@ -372,13 +416,19 @@ static void emit_rm_imm_store_operand(AsmDriver* d, MCEmitter* mc, u32 size,
n += x64_pack_rex_mem_operand(buf + n, size == 8u, 0, dst);
buf[n++] = use_i8 ? opc8 : opc32;
n += x64_pack_mem_operand(buf + n, sub, dst);
- if (size == 1u)
+ u32 trailing;
+ if (size == 1u) {
buf[n++] = (u8)imm;
- else if (use_i8)
+ trailing = 1u;
+ } else if (use_i8) {
buf[n++] = (u8)(i8)imm;
- else
+ trailing = 1u;
+ } else {
n += x64_put_u32le(buf + n, (u32)(i32)imm);
+ trailing = 4u;
+ }
mc->emit_bytes(mc, buf, n);
+ x64_emit_mem_reloc(d, mc, &dst, trailing);
}
static void expect_comma(AsmDriver* d) {
@@ -441,7 +491,7 @@ static __attribute__((unused)) void emit_movb_store_operand(AsmDriver* d,
emit_mem_operand(mc, src.reg, dst.base, dst.disp);
return;
}
- emit_mov_store_operand(mc, 1, src.reg, dst, 1);
+ emit_mov_store_operand(d, mc, 1, src.reg, dst, 1);
}
static __attribute__((unused)) void emit_rm_imm(AsmDriver* d, MCEmitter* mc,
@@ -848,12 +898,12 @@ static void parse_alu_rr(X64ParseCtx* p) {
if (p->width == 1u)
emit_movb_store_operand(p->d, p->mc, src, dst);
else
- emit_mov_store_operand(p->mc, p->width, src.reg, dst, 0);
+ emit_mov_store_operand(p->d, p->mc, p->width, src.reg, dst, 0);
return;
}
if (p->desc->opc[0] == 0x89u && src.kind == X64_ASM_OP_MEM &&
dst.kind == X64_ASM_OP_REG) {
- emit_mov_load_operand(p->mc, p->width, dst.reg, src);
+ emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src);
return;
}
/* ALU reg → mem store (add/or/and/sub/xor/cmp %reg, mem): the reg-to-r/m
@@ -861,7 +911,7 @@ static void parse_alu_rr(X64ParseCtx* p) {
* opcode's W bit (e.g. ADD r/m,r 0x01 → r/m8,r8 0x00). */
if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_MEM) {
u8 op = p->width == 1u ? (u8)(p->desc->opc[0] & ~1u) : p->desc->opc[0];
- emit_reg_store_operand(p->mc, p->width, op, src.reg, dst,
+ emit_reg_store_operand(p->d, p->mc, p->width, op, src.reg, dst,
p->width == 1u && byte_reg_needs_rex(&src));
return;
}
@@ -903,11 +953,11 @@ static void parse_mov_rm_load(X64ParseCtx* p) {
if (p->desc->opc[0] == 0x8Du) {
if (src.kind != X64_ASM_OP_MEM || dst.kind != X64_ASM_OP_REG)
asm_driver_panic(p->d, "x64 asm: lea form");
- emit_reg_mem_operand(p->mc, p->width, X64_OPC_LEA, dst.reg, src);
+ emit_reg_mem_operand(p->d, p->mc, p->width, X64_OPC_LEA, dst.reg, src);
return;
}
if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
- emit_mov_load_operand(p->mc, p->width, dst.reg, src);
+ emit_mov_load_operand(p->d, p->mc, p->width, dst.reg, src);
return;
}
if (src.kind == X64_ASM_OP_REG && dst.kind == X64_ASM_OP_REG) {
@@ -1134,10 +1184,14 @@ static void parse_rel32_branch(X64ParseCtx* p) {
asm_driver_parse_sym_expr(p->d, &sym, &off);
if (sym == OBJ_SYM_NONE)
asm_driver_panic(p->d, "x64 asm: symbolic branch target required");
- p->mc->emit_reloc_at(
- p->mc, asm_driver_cur_section(p->d), disp_pos,
- p->desc->fmt == X64_FMT_CALL_REL32 ? R_X64_PLT32 : R_PC32, sym, off - 4,
- 1, 0);
+ /* A `@PLT` suffix forces the PLT32 reloc (the default for `call`); plain
+ * `jmp sym` uses PC32. */
+ RelocKind dflt = p->desc->fmt == X64_FMT_CALL_REL32 ? R_X64_PLT32 : R_PC32;
+ RelocKind k = x64_parse_reloc_suffix(p->d, dflt);
+ if (k != R_X64_PLT32 && k != R_PC32)
+ asm_driver_panic(p->d, "x64 asm: only @PLT is valid on a branch target");
+ p->mc->emit_reloc_at(p->mc, asm_driver_cur_section(p->d), disp_pos, k, sym,
+ off - 4, 1, 0);
}
static void parse_setcc(X64ParseCtx* p) {
@@ -1436,11 +1490,11 @@ static void x64_arch_asm_insn(ArchAsm* base, AsmDriver* d, Sym mnemonic) {
if (w == 1u)
emit_movb_store_operand(d, mc, src, dst);
else
- emit_mov_store_operand(mc, w, src.reg, dst, 0);
+ emit_mov_store_operand(d, mc, w, src.reg, dst, 0);
return;
}
if (src.kind == X64_ASM_OP_MEM && dst.kind == X64_ASM_OP_REG) {
- emit_mov_load_operand(mc, w, dst.reg, src);
+ emit_mov_load_operand(d, mc, w, dst.reg, src);
return;
}
asm_driver_panic(d, "x64 asm: mov form");
diff --git a/test/asm/encode/x64_reloc_modifiers.expected.hex b/test/asm/encode/x64_reloc_modifiers.expected.hex
@@ -0,0 +1 @@
+488d0500000000488b0d0000000048891500000000488b3500000000c7050000000007000000e800000000e900000000c3
diff --git a/test/asm/encode/x64_reloc_modifiers.s b/test/asm/encode/x64_reloc_modifiers.s
@@ -0,0 +1,10 @@
+.text
+t:
+ leaq sym(%rip), %rax
+ movq sym(%rip), %rcx
+ movq %rdx, sym(%rip)
+ movq sym@GOTPCREL(%rip), %rsi
+ movl $7, sym(%rip)
+ call foo@PLT
+ jmp bar@PLT
+ ret
diff --git a/test/asm/encode/x64_reloc_modifiers.targets b/test/asm/encode/x64_reloc_modifiers.targets
@@ -0,0 +1 @@
+x64
+\ No newline at end of file