commit f5a4f04e3bf6b4fd9bd8929bfc657c3bf7be9e28
parent 80ba21ae01667332bf937b70d209a0e73d690938
Author: Ryan Sepassi <rsepassi@gmail.com>
Date: Mon, 8 Jun 2026 13:03:23 -0700
Track 4: drop dead INTRIN_MEMCPY/INTRIN_MEMSET intrinsics
memcpy/memset are dedicated public ops (kit_cg_memcpy/_memset) that lower to the
copy_bytes/set_bytes CgTarget hooks -> CG_IR_AGG_COPY/AGG_SET -> the backends'
agg-copy/set paths (and IOP_AGG_COPY/SET in the interpreter). Only kit_cg_memmove
flows through the generic intrinsic hook (INTRIN_MEMMOVE). So the internal
IntrinKind values INTRIN_MEMCPY and INTRIN_MEMSET were never emitted anywhere —
"two spellings of one behavior" (CODEGEN.md Principle 7). Remove them and the dead
switch arms across all six backends (x64/aa64/riscv native, wasm, c_target, interp);
the memmove-only shapes that shared an arm keep their overlap-safe path.
(The C frontend's private cg_adapter.h INTRIN_MEM* enum is a separate namespace,
left untouched.)
Green: lib/bin/rt, cg-api (interp memmove), libc 9/0, parse + parse-err 128/0,
smoke x64/rv64.
Diffstat:
7 files changed, 34 insertions(+), 244 deletions(-)
diff --git a/src/arch/aa64/native.c b/src/arch/aa64/native.c
@@ -3501,20 +3501,6 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
return;
}
break;
- case INTRIN_MEMCPY:
- if (narg != 3u || args[0].kind != NATIVE_LOC_REG ||
- args[1].kind != NATIVE_LOC_REG || args[2].kind != NATIVE_LOC_IMM)
- aa_panic(aa_of(t), "unsupported memory intrinsic operands");
- if (args[2].v.imm < 0 || args[2].v.imm > 0xffffffffll)
- aa_panic(aa_of(t), "unsupported memory intrinsic size");
- access.size = (u32)args[2].v.imm;
- access.align = 1u;
- dst_addr.base_kind = NATIVE_ADDR_BASE_REG;
- dst_addr.base.reg = args[0].v.reg;
- src_addr.base_kind = NATIVE_ADDR_BASE_REG;
- src_addr.base.reg = args[1].v.reg;
- aa_copy_bytes(t, dst_addr, src_addr, access);
- return;
case INTRIN_MEMMOVE: {
MCLabel forward = t->mc->label_new(t->mc);
MCLabel done = t->mc->label_new(t->mc);
@@ -3540,24 +3526,6 @@ static void aa_intrinsic(NativeTarget* t, IntrinKind kind,
t->mc->label_place(t->mc, done);
return;
}
- case INTRIN_MEMSET:
- if (narg != 3u || args[0].kind != NATIVE_LOC_REG ||
- args[2].kind != NATIVE_LOC_IMM)
- aa_panic(aa_of(t), "unsupported memset operands");
- if (args[2].v.imm < 0 || args[2].v.imm > 0xffffffffll)
- aa_panic(aa_of(t), "unsupported memset size");
- access.size = (u32)args[2].v.imm;
- access.align = 1u;
- dst_addr.base_kind = NATIVE_ADDR_BASE_REG;
- dst_addr.base.reg = args[0].v.reg;
- if (args[1].kind == NATIVE_LOC_IMM) {
- NativeLoc byte = aa_tmp_loc(builtin_id(KIT_CG_BUILTIN_I8), AA_TMP0);
- aa_emit_load_imm(t->mc, 0, AA_TMP0, args[1].v.imm & 0xff);
- aa_set_bytes(t, dst_addr, byte, access);
- } else {
- aa_set_bytes(t, dst_addr, args[1], access);
- }
- return;
case INTRIN_EXPECT:
case INTRIN_ASSUME_ALIGNED:
if (ndst == 1u && narg >= 1u) {
diff --git a/src/arch/c_target/c_emit.c b/src/arch/c_target/c_emit.c
@@ -2551,23 +2551,15 @@ void c_emit_intrinsic(CTarget* t, IntrinKind k, Operand* dsts, u32 ndst,
c_emit_local_assign_close(t);
return;
}
- case INTRIN_MEMCPY:
- case INTRIN_MEMMOVE:
- case INTRIN_MEMSET: {
- const char* fn = (k == INTRIN_MEMCPY) ? "__builtin_memcpy"
- : (k == INTRIN_MEMMOVE) ? "__builtin_memmove"
- : "__builtin_memset";
- cbuf_puts(&t->body, " ");
- cbuf_puts(&t->body, fn);
- cbuf_puts(&t->body, "(");
+ case INTRIN_MEMMOVE: {
+ cbuf_puts(&t->body, " __builtin_memmove(");
for (u32 i = 0; i < narg; ++i) {
if (i > 0) cbuf_puts(&t->body, ", ");
- /* The pointer operands (dst for all three; src for mem{cpy,move})
- * may be typed as a plain integer local when they come from address
- * arithmetic, which the C target declares as int64_t. The builtins
- * take void*, so cast explicitly to avoid -Wint-conversion. */
- int is_ptr_arg =
- (i == 0) || (i == 1 && (k == INTRIN_MEMCPY || k == INTRIN_MEMMOVE));
+ /* The pointer operands (dst and src) may be typed as a plain integer
+ * local when they come from address arithmetic, which the C target
+ * declares as int64_t. __builtin_memmove takes void*, so cast
+ * explicitly to avoid -Wint-conversion. */
+ int is_ptr_arg = (i == 0) || (i == 1);
if (is_ptr_arg) cbuf_puts(&t->body, "(void*)");
c_emit_operand(t, args[i]);
}
diff --git a/src/arch/riscv/native.c b/src/arch/riscv/native.c
@@ -3268,7 +3268,6 @@ static void rv_intrinsic(NativeTarget* t, IntrinKind kind,
}
return;
}
- case INTRIN_MEMCPY:
case INTRIN_MEMMOVE: {
u32 dr, sr, n;
if (narg != 3u || args[0].kind != NATIVE_LOC_REG ||
@@ -3279,64 +3278,7 @@ static void rv_intrinsic(NativeTarget* t, IntrinKind kind,
dr = loc_reg(args[0]);
sr = loc_reg(args[1]);
n = (u32)args[2].v.imm;
- rv_intrin_copy(v, mc, dr, sr, n, kind == INTRIN_MEMMOVE);
- return;
- }
- case INTRIN_MEMSET: {
- u32 dr, n, src;
- int wide = v->ptr_bytes == 8u; /* 8-byte sd stores only on rv64 */
- if (narg != 3u || args[0].kind != NATIVE_LOC_REG ||
- args[2].kind != NATIVE_LOC_IMM)
- rv_panic(a, "unsupported memset operands");
- if (args[2].v.imm < 0 || args[2].v.imm > 0xffffffffll)
- rv_panic(a, "unsupported memset size");
- dr = loc_reg(args[0]);
- n = (u32)args[2].v.imm;
- if (args[1].kind == NATIVE_LOC_IMM) {
- u32 byte = (u32)(args[1].v.imm & 0xffu);
- if (byte == 0) {
- src = RV_ZERO;
- } else {
- u64 b = byte;
- b |= b << 8;
- b |= b << 16;
- if (wide) b |= b << 32;
- rv_emit_load_imm(v, mc, 1, RV_TMP3, (i64)b);
- src = RV_TMP3;
- }
- } else {
- /* Replicate the low byte across the register width (4 or 8 bytes). */
- u32 rb = loc_reg(args[1]);
- rv64_emit32(mc, rv_andi(RV_TMP3, rb, 0xff));
- rv64_emit32(mc, rv_slli(RV_TMP2, RV_TMP3, 8));
- rv64_emit32(mc, rv_or(RV_TMP3, RV_TMP3, RV_TMP2));
- rv64_emit32(mc, rv_slli(RV_TMP2, RV_TMP3, 16));
- rv64_emit32(mc, rv_or(RV_TMP3, RV_TMP3, RV_TMP2));
- if (wide) {
- rv64_emit32(mc, rv_slli(RV_TMP2, RV_TMP3, 32));
- rv64_emit32(mc, rv_or(RV_TMP3, RV_TMP3, RV_TMP2));
- }
- src = RV_TMP3;
- }
- {
- u32 i = 0;
- while (wide && i + 8u <= n) {
- rv64_emit32(mc, rv_sd(src, dr, (i32)i));
- i += 8u;
- }
- while (i + 4u <= n) {
- rv64_emit32(mc, rv_sw(src, dr, (i32)i));
- i += 4u;
- }
- while (i + 2u <= n) {
- rv64_emit32(mc, rv_sh(src, dr, (i32)i));
- i += 2u;
- }
- while (i < n) {
- rv64_emit32(mc, rv_sb(src, dr, (i32)i));
- i += 1u;
- }
- }
+ rv_intrin_copy(v, mc, dr, sr, n, /*reverse (overlap-safe)=*/1);
return;
}
case INTRIN_CPU_NOP:
diff --git a/src/arch/wasm/emit.c b/src/arch/wasm/emit.c
@@ -1575,12 +1575,8 @@ static const char* intrin_name(IntrinKind k) {
return "__builtin_clz";
case INTRIN_BSWAP:
return "__builtin_bswap";
- case INTRIN_MEMCPY:
- return "memcpy";
case INTRIN_MEMMOVE:
return "memmove";
- case INTRIN_MEMSET:
- return "memset";
case INTRIN_PREFETCH:
return "__builtin_prefetch";
case INTRIN_ASSUME_ALIGNED:
@@ -1668,11 +1664,10 @@ void wasm_intrinsic(CGTarget* tg, IntrinKind k, Operand* dst, u32 ndst,
}
return;
- case INTRIN_MEMCPY:
case INTRIN_MEMMOVE: {
- /* memcpy/memmove both lower to memory.copy, which is spec-defined to
- * handle overlap correctly. CG forces (dst, src) to REG and passes
- * size as OPK_IMM (kit_cg_memmove). */
+ /* memmove lowers to memory.copy, which is spec-defined to handle overlap
+ * correctly. CG forces (dst, src) to REG and passes size as OPK_IMM
+ * (kit_cg_memmove). */
if (nargs != 3 || args[0].kind != OPK_REG || args[1].kind != OPK_REG) {
compiler_panic(t->c, cur_loc(t),
"wasm target: %s requires register pointers",
@@ -1699,31 +1694,6 @@ void wasm_intrinsic(CGTarget* tg, IntrinKind k, Operand* dst, u32 ndst,
return;
}
- case INTRIN_MEMSET: {
- if (nargs != 3 || args[0].kind != OPK_REG) {
- compiler_panic(t->c, cur_loc(t),
- "wasm target: memset requires a register dst pointer");
- return;
- }
- if (args[1].kind != OPK_IMM || args[2].kind != OPK_IMM) {
- compiler_panic(t->c, cur_loc(t),
- "wasm target: memset with non-constant byte/size is "
- "not yet supported");
- return;
- }
- ensure_linear_memory(t);
- AggregateAccess a;
- memset(&a, 0, sizeof a);
- a.size = (u32)args[2].v.imm;
- a.align = 1;
- WIR* w = wir_push(t);
- w->op = WIR_SET_BYTES;
- w->addr = args[0];
- wir_capture_operand(w, 0, args[1]);
- w->agg = a;
- return;
- }
-
case INTRIN_CLZ:
case INTRIN_CTZ:
case INTRIN_POPCOUNT:
diff --git a/src/arch/x64/native.c b/src/arch/x64/native.c
@@ -3535,9 +3535,8 @@ static void x64_intrinsic(NativeTarget* t, IntrinKind kind,
emit_movzx_r32_r8(mc, rovf, rovf);
return;
}
- case INTRIN_MEMCPY:
case INTRIN_MEMMOVE: {
- u32 dr, sr, n;
+ u32 dr, sr, n, i;
if (narg != 3u || args[0].kind != NATIVE_LOC_REG ||
args[1].kind != NATIVE_LOC_REG || args[2].kind != NATIVE_LOC_IMM)
x64_panic(a, "unsupported memory intrinsic operands");
@@ -3546,50 +3545,26 @@ static void x64_intrinsic(NativeTarget* t, IntrinKind kind,
dr = loc_reg(args[0]);
sr = loc_reg(args[1]);
n = (u32)args[2].v.imm;
- if (kind == INTRIN_MEMCPY) {
- u32 i = 0;
- while (i + 8u <= n) {
- emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- i += 8u;
- }
- while (i + 4u <= n) {
- emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
- i += 4u;
- }
- while (i + 2u <= n) {
- emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
- i += 2u;
- }
- while (i < n) {
- emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
- i += 1u;
- }
- } else {
- u32 i = n;
- while (i >= 8u) {
- i -= 8u;
- emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- }
- while (i >= 4u) {
- i -= 4u;
- emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
- }
- while (i >= 2u) {
- i -= 2u;
- emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
- }
- while (i >= 1u) {
- i -= 1u;
- emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
- emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
- }
+ i = n; /* copy high-to-low so an overlapping dst > src is safe */
+ while (i >= 8u) {
+ i -= 8u;
+ emit_mov_load(mc, 8, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
+ }
+ while (i >= 4u) {
+ i -= 4u;
+ emit_mov_load(mc, 4, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
+ }
+ while (i >= 2u) {
+ i -= 2u;
+ emit_mov_load(mc, 2, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
+ }
+ while (i >= 1u) {
+ i -= 1u;
+ emit_mov_load(mc, 1, 0, X64_RAX, sr, (i32)i);
+ emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
}
return;
}
@@ -3619,50 +3594,6 @@ static void x64_intrinsic(NativeTarget* t, IntrinKind kind,
mc->emit_bytes(mc, &b, 1);
return;
}
- case INTRIN_MEMSET: {
- u32 dr, n;
- if (narg != 3u || args[0].kind != NATIVE_LOC_REG ||
- args[2].kind != NATIVE_LOC_IMM)
- x64_panic(a, "unsupported memset operands");
- if (args[2].v.imm < 0 || args[2].v.imm > 0xffffffffll)
- x64_panic(a, "unsupported memset size");
- dr = loc_reg(args[0]);
- n = (u32)args[2].v.imm;
- if (args[1].kind == NATIVE_LOC_IMM) {
- u8 byte = (u8)(args[1].v.imm & 0xffu);
- u64 b64 = byte;
- b64 |= b64 << 8;
- b64 |= b64 << 16;
- b64 |= b64 << 32;
- x64_emit_load_imm(mc, 1, X64_RAX, (i64)b64);
- } else {
- /* Broadcast low byte of a register via multiply by 0x0101010101010101.
- */
- x64_emit_load_imm(mc, 1, X64_R11, (i64)0x0101010101010101ll);
- emit_mov_rr(mc, 1, X64_RAX, loc_reg(args[1]));
- emit_imul_rr(mc, 1, X64_RAX, X64_R11);
- }
- {
- u32 i = 0;
- while (i + 8u <= n) {
- emit_mov_store(mc, 8, X64_RAX, dr, (i32)i);
- i += 8u;
- }
- while (i + 4u <= n) {
- emit_mov_store(mc, 4, X64_RAX, dr, (i32)i);
- i += 4u;
- }
- while (i + 2u <= n) {
- emit_mov_store(mc, 2, X64_RAX, dr, (i32)i);
- i += 2u;
- }
- while (i < n) {
- emit_mov_store(mc, 1, X64_RAX, dr, (i32)i);
- i += 1u;
- }
- }
- return;
- }
case INTRIN_FRAME_ADDRESS:
case INTRIN_RETURN_ADDRESS:
/* Walk the rbp frame-record chain. Every kit prologue keeps the rbp
diff --git a/src/cg/cgtarget.h b/src/cg/cgtarget.h
@@ -136,10 +136,9 @@ typedef enum IntrinKind {
INTRIN_CLZ,
INTRIN_BSWAP,
- /* memory */
- INTRIN_MEMCPY,
+ /* memory. memcpy/memset are the dedicated copy_bytes/set_bytes hooks
+ * (kit_cg_memcpy/_memset); only memmove flows through the intrinsic path. */
INTRIN_MEMMOVE,
- INTRIN_MEMSET,
INTRIN_PREFETCH,
INTRIN_ASSUME_ALIGNED,
diff --git a/src/interp/engine.c b/src/interp/engine.c
@@ -1568,24 +1568,12 @@ static int interp_intrinsic(InterpStack* st, InterpFunc* fn, u64* regs,
#define DST0 \
(aux->ndst > 0 && aux->dsts[0].kind == OPK_REG ? aux->dsts[0].v.reg : 0u)
switch (aux->kind) {
- case INTRIN_MEMCPY:
case INTRIN_MEMMOVE: {
u64 d = ARGV(0), s = ARGV(1), n = ARGV(2);
mem_copy(st, d, s, (u32)n);
if (aux->ndst > 0 && aux->dsts[0].kind == OPK_REG) regs[DST0] = d;
return 1;
}
- case INTRIN_MEMSET: {
- u64 d = ARGV(0), byte = ARGV(1), n = ARGV(2);
- u8* h = interp_translate(p, d, (u32)n, PERM_W);
- if (!h) {
- fault(st, "memset: invalid memory");
- return 0;
- }
- memset(h, (int)(byte & 0xffu), (size_t)n);
- if (aux->ndst > 0 && aux->dsts[0].kind == OPK_REG) regs[DST0] = d;
- return 1;
- }
case INTRIN_POPCOUNT:
regs[DST0] = ipopcount(ARGV(0), AWID(0));
return 1;